From bd4ab523ba664863d40470cc718c566158adfa31 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 24 Mar 2026 01:20:26 +0200
Subject: [PATCH] [3.14] gh-145264: Do not ignore excess Base64 data after the
 first padded quad (GH-145267) (GH-146326)

Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc)
no longer ignores excess data after the first padded quad in non-strict
(default) mode.  Instead, in conformance with RFC 4648, it ignores the
pad character, "=", if it is present before the end of the encoded data.
(cherry picked from commit 4561f6418a691b3e89aef0901f53fe0dfb7f7c0e)
(cherry picked from commit e31c55121620189a0d1a07b689762d8ca9c1b7fa)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
 Lib/test/test_binascii.py                                               |   73 +++++++
 Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst |    4 
 Modules/binascii.c                                                      |   93 +++++++---
 Modules/clinic/binascii.c.h                                             |   35 +++
 4 files changed, 174 insertions(+), 31 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst

Index: Python-3.9.25/Lib/test/test_binascii.py
===================================================================
--- Python-3.9.25.orig/Lib/test/test_binascii.py	2026-04-17 22:38:56.575372532 +0200
+++ Python-3.9.25/Lib/test/test_binascii.py	2026-04-17 22:39:09.320468684 +0200
@@ -15,7 +15,6 @@
                  'unhexlify', 'rledecode_hqx']
 all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx']
 
-
 class BinASCIITest(unittest.TestCase):
 
     type2test = bytes
@@ -115,6 +114,78 @@
         # empty strings. TBD: shouldn't it raise an exception instead ?
         self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
 
+    def test_base64_strict_mode(self):
+        # Test base64 with strict mode on
+        def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes):
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(self.type2test(data), strict_mode=True)
+            self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False),
+                             non_strict_mode_expected_result)
+            self.assertEqual(binascii.a2b_base64(self.type2test(data)),
+                             non_strict_mode_expected_result)
+
+        def assertExcessData(data, non_strict_mode_expected_result: bytes):
+            _assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result)
+
+        def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
+            _assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result)
+
+        def assertLeadingPadding(data, non_strict_mode_expected_result: bytes):
+            _assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result)
+
+        def assertDiscontinuousPadding(data, non_strict_mode_expected_result: bytes):
+            _assertRegexTemplate(r'(?i)Discontinuous padding', data, non_strict_mode_expected_result)
+
+        def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
+            _assertRegexTemplate(r'(?i)Excess padding', data, non_strict_mode_expected_result)
+
+        # Test excess data exceptions
+        assertExcessPadding(b'ab===', b'i')
+        assertExcessPadding(b'ab====', b'i')
+        assertNonBase64Data(b'ab==:', b'i')
+        assertExcessData(b'abc=a', b'i\xb7\x1a')
+        assertNonBase64Data(b'abc=:', b'i\xb7')
+        assertNonBase64Data(b'ab==\n', b'i')
+        assertExcessPadding(b'abc==', b'i\xb7')
+        assertExcessPadding(b'abc===', b'i\xb7')
+        assertExcessPadding(b'abc====', b'i\xb7')
+        assertExcessPadding(b'abc=====', b'i\xb7')
+
+        # Test non-base64 data exceptions
+        assertNonBase64Data(b'\nab==', b'i')
+        assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
+        assertNonBase64Data(b'a\nb==', b'i')
+        assertNonBase64Data(b'a\x00b==', b'i')
+
+        # Test malformed padding
+        assertLeadingPadding(b'=', b'')
+        assertLeadingPadding(b'==', b'')
+        assertLeadingPadding(b'===', b'')
+        assertLeadingPadding(b'====', b'')
+        assertLeadingPadding(b'=====', b'')
+        assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
+        assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
+        assertExcessPadding(b'abcd=', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd==', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd===', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd====', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
+
+    def test_base64_excess_data(self):
+        # Test excess data exceptions
+        def assertExcessData(data, expected):
+            assert_regex = r'(?i)Excess data'
+            data = self.type2test(data)
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(data, strict_mode=True)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data), expected)
+
+        assertExcessData(b'ab==c=', b'i\xb7')
+        assertExcessData(b'ab==cd', b'i\xb7\x1d')
+        assertExcessData(b'abc=d', b'i\xb7\x1d')
+
     def test_base64errors(self):
         # Test base64 with invalid padding
         def assertIncorrectPadding(data):
Index: Python-3.9.25/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ Python-3.9.25/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst	2026-04-17 22:39:09.320901249 +0200
@@ -0,0 +1,4 @@
+Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no
+longer ignores excess data after the first padded quad in non-strict
+(default) mode.  Instead, in conformance with :rfc:`4648`, section 3.3, it now ignores
+the pad character, "=", if it is present before the end of the encoded data.
Index: Python-3.9.25/Modules/binascii.c
===================================================================
--- Python-3.9.25.orig/Modules/binascii.c	2025-10-31 19:40:52.000000000 +0100
+++ Python-3.9.25/Modules/binascii.c	2026-04-17 22:39:09.321266226 +0200
@@ -433,18 +433,25 @@
 
     data: ascii_buffer
     /
+    *
+    strict_mode: bool(accept={int}) = False
 
 Decode a line of base64 data.
+
+strict_mode
+  When set to True, bytes that are not part of the base64 standard are not allowed.
+  The same applies to excess data after padding (= / ==).
 [clinic start generated code]*/
 
 static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
-/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
+/*[clinic end generated code: output=5409557788d4f975 input=c0c15fd0f8f9a62d]*/
 {
     assert(data->len >= 0);
 
     const unsigned char *ascii_data = data->buf;
     size_t ascii_len = data->len;
+    binascii_state *state = NULL;
 
     /* Allocate the buffer */
     Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
@@ -465,20 +472,55 @@
         ** the invalid ones.
         */
         if (this_ch == BASE64_PAD) {
-            if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
-                /* A pad sequence means no more input.
-                ** We've already interpreted the data
-                ** from the quad at this point.
-                */
-                goto done;
+            pads++;
+            if (quad_pos >= 2 && quad_pos + pads <= 4) {
+                continue;
             }
-            continue;
+            // See RFC 4648, section-3.3: "specifications MAY ignore the
+            // pad character, "=", treating it as non-alphabet data, if
+            // it is present before the end of the encoded data" and
+            // "the excess pad characters MAY also be ignored."
+            if (!strict_mode) {
+                continue;
+            }
+            if (quad_pos == 1) {
+                /* Set an error below. */
+                break;
+            }
+            state = get_binascii_state(module);
+            if (state) {
+                PyErr_SetString(state->Error,
+                                (quad_pos == 0 && i == 0)
+                                ? "Leading padding not allowed"
+                                : "Excess padding not allowed");
+            }
+            goto error_end;
         }
 
         this_ch = table_a2b_base64[this_ch];
         if (this_ch >= 64) {
+            // See RFC 4648, section-3.3.
+            if (strict_mode) {
+                state = get_binascii_state(module);
+                if (state) {
+                    PyErr_SetString(state->Error, "Only base64 data is allowed");
+                }
+                goto error_end;
+            }
             continue;
         }
+
+        // Characters that are not '=', in the middle of the padding, are
+        // not allowed (except when they are). See RFC 4648, section-3.3.
+        if (pads && strict_mode) {
+            state = get_binascii_state(module);
+            if (state) {
+                PyErr_SetString(state->Error, (quad_pos + pads == 4)
+                    ? "Excess data after padding"
+                    : "Discontinuous padding not allowed");
+            }
+            goto error_end;
+        }
         pads = 0;
 
         switch (quad_pos) {
@@ -504,30 +546,35 @@
         }
     }
 
-    if (quad_pos != 0) {
-        binascii_state *state = PyModule_GetState(module);
-        if (state == NULL) {
-            /* error already set, from PyModule_GetState */
-        } else if (quad_pos == 1) {
-            /*
-            ** There is exactly one extra valid, non-padding, base64 character.
-            ** This is an invalid length, as there is no possible input that
-            ** could encoded into such a base64 string.
-            */
+    if (quad_pos == 1) {
+        /* There is exactly one extra valid, non-padding, base64 character.
+         * * This is an invalid length, as there is no possible input that
+         ** could encoded into such a base64 string.
+         */
+        state = get_binascii_state(module);
+        if (state) {
             PyErr_Format(state->Error,
                          "Invalid base64-encoded string: "
                          "number of data characters (%zd) cannot be 1 more "
                          "than a multiple of 4",
                          (bin_data - bin_data_start) / 3 * 4 + 1);
-        } else {
+        }
+        goto error_end;
+    }
+
+    if (quad_pos != 0 && quad_pos + pads < 4) {
+        state = get_binascii_state(module);
+        if (state) {
             PyErr_SetString(state->Error, "Incorrect padding");
         }
-        _PyBytesWriter_Dealloc(&writer);
-        return NULL;
+        goto error_end;
     }
 
-done:
     return _PyBytesWriter_Finish(&writer, bin_data);
+
+error_end:
+    _PyBytesWriter_Dealloc(&writer);
+    return NULL;
 }
 
 
Index: Python-3.9.25/Modules/clinic/binascii.c.h
===================================================================
--- Python-3.9.25.orig/Modules/clinic/binascii.c.h	2025-10-31 19:40:52.000000000 +0100
+++ Python-3.9.25/Modules/clinic/binascii.c.h	2026-04-17 22:39:09.321666090 +0200
@@ -92,27 +92,48 @@
 }
 
 PyDoc_STRVAR(binascii_a2b_base64__doc__,
-"a2b_base64($module, data, /)\n"
+"a2b_base64($module, data, /, *, strict_mode=False)\n"
 "--\n"
 "\n"
-"Decode a line of base64 data.");
+"Decode a line of base64 data.\n"
+"\n"
+"strict_mode\n"
+"  When set to True, bytes that are not part of the base64 standard are not allowed.\n"
+"  The same applies to excess data after padding (= / ==).");
 
 #define BINASCII_A2B_BASE64_METHODDEF    \
-    {"a2b_base64", (PyCFunction)binascii_a2b_base64, METH_O, binascii_a2b_base64__doc__},
+    {"a2b_base64", (PyCFunction)(void(*)(void))binascii_a2b_base64, METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__},
 
 static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data);
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode);
 
 static PyObject *
-binascii_a2b_base64(PyObject *module, PyObject *arg)
+binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
+    static const char * const _keywords[] = {"", "strict_mode", NULL};
+    static _PyArg_Parser _parser = {NULL, _keywords, "a2b_base64", 0};
+    PyObject *argsbuf[2];
+    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
     Py_buffer data = {NULL, NULL};
+    int strict_mode = 0;
 
-    if (!ascii_buffer_converter(arg, &data)) {
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (!ascii_buffer_converter(args[0], &data)) {
+        goto exit;
+    }
+    if (!noptargs) {
+        goto skip_optional_kwonly;
+    }
+    strict_mode = _PyLong_AsInt(args[1]);
+    if (strict_mode == -1 && PyErr_Occurred()) {
         goto exit;
     }
-    return_value = binascii_a2b_base64_impl(module, &data);
+skip_optional_kwonly:
+    return_value = binascii_a2b_base64_impl(module, &data, strict_mode);
 
 exit:
     /* Cleanup for data */
