From f13c20f81b56108ac477213fa5ada2524b5e5c98 Mon Sep 17 00:00:00 2001
From: Natalia <124304+nessita@users.noreply.github.com>
Date: Thu, 5 Mar 2026 14:41:44 -0300
Subject: [PATCH] [4.2.x] Fixed CVE-2026-33033 -- Mitigated potential DoS in
 MultiPartParser.

When a multipart file part used `Content-Transfer-Encoding: base64` and
the non-whitespace base64 bytes did not align to a multiple of 4 within
a chunk, the parser entered a loop calling `field_stream.read(1-3)` once
per whitespace byte. Each such call fetched the entire internal buffer,
sliced off 1-3 bytes, and pushed the remainder back via unget(), doing
an O(n) memory copy per call. A 2.5 MB payload of mostly whitespace
produced CPU amplification relative to a normal upload of the same size.

The alignment loop now reads `self._chunk_size` bytes at a time, and
accumulates stripped parts in a list joined once at the end.

Thanks to Seokchan Yoon for the report and the fixing patch.

Backport of 7e9885f99cee771b51692fadc5592bdbf19641aa from main.
---
 django/http/multipartparser.py |  15 +++--
 docs/releases/4.2.30.txt       |  10 +++
 tests/requests_tests/tests.py  | 114 ++++++++++++++++++++++++++++++++-
 3 files changed, 131 insertions(+), 8 deletions(-)

diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py
index 5ab63455efcd..856ab680594e 100644
--- a/django/http/multipartparser.py
+++ b/django/http/multipartparser.py
@@ -302,15 +302,18 @@ def _parse(self):
                                 # We should always decode base64 chunks by
                                 # multiple of 4, ignoring whitespace.
 
-                                stripped_chunk = b"".join(chunk.split())
+                                stripped_parts = [b"".join(chunk.split())]
+                                stripped_length = len(stripped_parts[0])
 
-                                remaining = len(stripped_chunk) % 4
-                                while remaining != 0:
-                                    over_chunk = field_stream.read(4 - remaining)
+                                while stripped_length % 4 != 0:
+                                    over_chunk = field_stream.read(self._chunk_size)
                                     if not over_chunk:
                                         break
-                                    stripped_chunk += b"".join(over_chunk.split())
-                                    remaining = len(stripped_chunk) % 4
+                                    over_stripped = b"".join(over_chunk.split())
+                                    stripped_parts.append(over_stripped)
+                                    stripped_length += len(over_stripped)
+
+                                stripped_chunk = b"".join(stripped_parts)
 
                                 try:
                                     chunk = base64.b64decode(stripped_chunk)
diff --git a/tests/requests_tests/tests.py b/tests/requests_tests/tests.py
index d3e9e6622461..6305c8827365 100644
--- a/tests/requests_tests/tests.py
+++ b/tests/requests_tests/tests.py
@@ -1,6 +1,7 @@
 import copy
 from io import BytesIO
 from itertools import chain
+from unittest import mock
 from urllib.parse import urlencode
 
 from django.core.exceptions import DisallowedHost
@@ -11,10 +12,10 @@
     RawPostDataException,
     UnreadablePostError,
 )
-from django.http.multipartparser import MultiPartParserError
+from django.http.multipartparser import LazyStream, MultiPartParserError
 from django.http.request import split_domain_port
 from django.test import RequestFactory, SimpleTestCase, override_settings
-from django.test.client import FakePayload
+from django.test.client import BOUNDARY, MULTIPART_CONTENT, FakePayload
 
 
 class RequestsTests(SimpleTestCase):
@@ -537,6 +538,115 @@ def test_POST_after_body_read_and_stream_read(self):
         self.assertEqual(request.read(1), b"n")
         self.assertEqual(request.POST, {"name": ["value"]})
 
+    def test_multipart_post_field_with_base64(self):
+        payload = FakePayload(
+            "\r\n".join(
+                [
+                    f"--{BOUNDARY}",
+                    'Content-Disposition: form-data; name="name"',
+                    "Content-Transfer-Encoding: base64",
+                    "",
+                    "dmFsdWU=",
+                    f"--{BOUNDARY}--",
+                    "",
+                ]
+            )
+        )
+        request = WSGIRequest(
+            {
+                "REQUEST_METHOD": "POST",
+                "CONTENT_TYPE": MULTIPART_CONTENT,
+                "CONTENT_LENGTH": len(payload),
+                "wsgi.input": payload,
+            }
+        )
+        request.body  # evaluate
+        self.assertEqual(request.POST, {"name": ["value"]})
+
+    def test_multipart_post_field_with_invalid_base64(self):
+        payload = FakePayload(
+            "\r\n".join(
+                [
+                    f"--{BOUNDARY}",
+                    'Content-Disposition: form-data; name="name"',
+                    "Content-Transfer-Encoding: base64",
+                    "",
+                    "123",
+                    f"--{BOUNDARY}--",
+                    "",
+                ]
+            )
+        )
+        request = WSGIRequest(
+            {
+                "REQUEST_METHOD": "POST",
+                "CONTENT_TYPE": MULTIPART_CONTENT,
+                "CONTENT_LENGTH": len(payload),
+                "wsgi.input": payload,
+            }
+        )
+        request.body  # evaluate
+        self.assertEqual(request.POST, {"name": ["123"]})
+
+    def test_multipart_file_upload_base64_whitespace_heavy(self):
+        # Fake a file upload with base64-encoded content including mostly
+        # whitespaces across chunk boundaries.
+        payload = FakePayload(
+            "\r\n".join(
+                [
+                    f"--{BOUNDARY}",
+                    'Content-Disposition: form-data; name="file"; filename="test.txt"',
+                    "Content-Type: application/octet-stream",
+                    "Content-Transfer-Encoding: base64",
+                    "",
+                ]
+            )
+        )
+        # "AAAA" decodes to b"\x00\x00\x00". Whitespace (70000 bytes) spans the
+        # default 64KB chunk boundary, hence the alignment loop is exercised.
+        payload.write(b"\r\n" + b"AAA" + b" " * 70000 + b"A" + b"\r\n")
+        payload.write("--" + BOUNDARY + "--\r\n")
+        request = WSGIRequest(
+            {
+                "REQUEST_METHOD": "POST",
+                "CONTENT_TYPE": MULTIPART_CONTENT,
+                "CONTENT_LENGTH": len(payload),
+                "wsgi.input": payload,
+            }
+        )
+        reads = []
+        original_read = LazyStream.read
+
+        def counting_read(self_stream, size=None):
+            reads.append(size)
+            return original_read(self_stream, size)
+
+        with mock.patch.object(LazyStream, "read", counting_read):
+            files = request.FILES
+
+        self.assertEqual(len(files), 1)
+        self.assertEqual(files["file"].read(), b"\x00\x00\x00")
+
+        # The alignment loop must read in `chunk-sized` units rather than one
+        # byte at a time, otherwise each whitespace byte triggers a separate
+        # read() call with a costly internal unget() cycle.
+        # Parsing this payload should issue exactly 8 LazyStream.read() calls:
+        # 1. main_stream.read(1)     -- BoundaryIter.__init__ probe, preamble
+        # 2. sub_stream.read(1024)   -- parse_boundary_stream, preamble headers
+        # 3. main_stream.read(1)     -- BoundaryIter.__init__ probe, file field
+        # 4. field_stream.read(1024) -- parse_boundary_stream, file headers
+        # 5. field_stream.read(65536)-- base64 alignment loop: one chunk-sized
+        #                               read to find the non-whitespace bytes
+        #                               needed to complete the 4-byte base64
+        #                               group that spans the chunk boundary
+        # 6. main_stream.read(1)     -- BoundaryIter.__init__ probe, epilogue
+        # 7. sub_stream.read(1024)   -- parse_boundary_stream, epilogue headers
+        # 8. main_stream.read(1)     -- BoundaryIter.__init__ probe, exhausted
+        #                               stream; returns b"" and stops iteration
+        # A byte-at-a-time implementation of read() in step 5 would do instead
+        # one read(1) per whitespace byte past the chunk boundary (4488 calls).
+        self.assertEqual(reads, [1, 1024, 1, 1024, 65536, 1, 1024, 1])
+
     def test_POST_after_body_read_and_stream_read_multipart(self):
         """
         POST should be populated even if body is read first, and then
