From 851473490281f82d82560b2368284ef33cf6e8f9 Mon Sep 17 00:00:00 2001
From: lizhenghao <sculizhenghao@foxmail.com>
Date: Wed, 22 Oct 2025 10:26:34 +0800
Subject: [PATCH 1/3] Fix: Fixed a read(-1) vulnerability caused by boundary
 handling error in #264

---
 source/decoder.c      |  8 +++++++-
 tests/test_decoder.py | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

Index: cbor2-5.6.5/source/decoder.c
===================================================================
--- cbor2-5.6.5.orig/source/decoder.c
+++ cbor2-5.6.5/source/decoder.c
@@ -758,7 +758,7 @@ decode_definite_long_string(CBORDecoderO
     char *buffer = NULL;
     while (left) {
         // Read up to 65536 bytes of data from the stream
-        Py_ssize_t chunk_length = 65536 - buffer_size;
+        Py_ssize_t chunk_length = 65536 - buffer_length;
         if (left < chunk_length)
             chunk_length = left;
 
@@ -828,7 +828,13 @@ decode_definite_long_string(CBORDecoderO
                 memcpy(buffer, bytes_buffer + consumed, unconsumed);
             }
             buffer_length = unconsumed;
+        } else {
+            // All bytes consumed, reset buffer_length
+            buffer_length = 0;
         }
+
+        Py_DECREF(chunk);
+        chunk = NULL;
     }
 
     if (ret && string_namespace_add(self, ret, length) == -1)
Index: cbor2-5.6.5/tests/test_decoder.py
===================================================================
--- cbor2-5.6.5.orig/tests/test_decoder.py
+++ cbor2-5.6.5/tests/test_decoder.py
@@ -260,6 +260,28 @@ def test_string_oversized(impl) -> None:
         (impl.loads(unhexlify("aeaeaeaeaeaeaeaeae0108c29843d90100d8249f0000aeaeffc26ca799")),)
 
 
+def test_string_issue_264_multiple_chunks_utf8_boundary(impl) -> None:
+    """Test for Issue #264: UTF-8 characters split across multiple 65536-byte chunk boundaries."""
+    import struct
+
+    # Construct: 65535 'a' + '€' (3 bytes) + 65533 'b' + '€' (3 bytes) + 100 'd'
+    # Total: 131174 bytes, which spans 3 chunks (65536 + 65536 + 102)
+    total_bytes = 65535 + 3 + 65533 + 3 + 100
+
+    payload = b"\x7a" + struct.pack(">I", total_bytes)  # major type 3, 4-byte length
+    payload += b"a" * 65535
+    payload += "€".encode()  # U+20AC: E2 82 AC
+    payload += b"b" * 65533
+    payload += "€".encode()
+    payload += b"d" * 100
+
+    expected = "a" * 65535 + "€" + "b" * 65533 + "€" + "d" * 100
+
+    result = impl.loads(payload)
+    assert result == expected
+    assert len(result) == 131170  # 65535 + 1 + 65533 + 1 + 100 characters
+
+
 @pytest.mark.parametrize(
     "payload, expected",
     [
