From bd4ab523ba664863d40470cc718c566158adfa31 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 24 Mar 2026 01:20:26 +0200
Subject: [PATCH] [3.14] gh-145264: Do not ignore excess Base64 data after the
 first padded quad (GH-145267) (GH-146326)

Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc)
no longer ignores excess data after the first padded quad in non-strict
(default) mode.  Instead, in conformance with RFC 4648, it ignores the
pad character, "=", if it is present before the end of the encoded data.
(cherry picked from commit 4561f6418a691b3e89aef0901f53fe0dfb7f7c0e)
(cherry picked from commit e31c55121620189a0d1a07b689762d8ca9c1b7fa)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
 Lib/test/test_binascii.py                                               |  104 ++++++-
 Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst |    4 
 Modules/binascii.c                                                      |  148 +++++-----
 Modules/clinic/binascii.c.h                                             |   24 +
 4 files changed, 209 insertions(+), 71 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst

Index: Python-3.6.15/Lib/test/test_binascii.py
===================================================================
--- Python-3.6.15.orig/Lib/test/test_binascii.py	2026-04-18 03:10:40.229540959 +0200
+++ Python-3.6.15/Lib/test/test_binascii.py	2026-04-18 03:19:06.878075064 +0200
@@ -3,6 +3,7 @@
 import unittest
 import binascii
 import array
+import re
 
 # Note: "*_hex" functions are aliases for "(un)hexlify"
 b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_hqx', 'b2a_qp', 'b2a_uu',
@@ -11,7 +12,6 @@
                  'unhexlify', 'rledecode_hqx']
 all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx']
 
-
 class BinASCIITest(unittest.TestCase):
 
     type2test = bytes
@@ -110,6 +110,108 @@
         # empty strings. TBD: shouldn't it raise an exception instead ?
         self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
 
+    def test_base64_strict_mode(self):
+        self.assertEqual(binascii.a2b_base64(self.type2test(b'ab=='),
+                                             strict_mode=True), b'i')
+
+        def assert_regex_template(assert_regex, data,
+                                  non_strict_mode_expected_result):
+            data = self.type2test(data)
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(data, strict_mode=True)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
+                             non_strict_mode_expected_result)
+            self.assertEqual(binascii.a2b_base64(data),
+                             non_strict_mode_expected_result)
+
+        def assert_excess_data(data, expected):
+            assert_regex_template(r'(?i)Excess data', data, expected)
+
+        def assert_non_base64_data(data, expected):
+            assert_regex_template(r'(?i)Only base64 data', data, expected)
+
+        def assert_leading_padding(data, expected):
+            assert_regex_template(r'(?i)Leading padding', data, expected)
+
+        def assert_discontinuous_padding(data, expected):
+            assert_regex_template(r'(?i)Discontinuous padding', data, expected)
+
+        def assert_excess_padding(data, expected):
+            assert_regex_template(r'(?i)Excess padding', data, expected)
+
+        assert_excess_padding(b'ab===', b'i')
+        assert_excess_padding(b'ab====', b'i')
+        assert_non_base64_data(b'ab==:', b'i')
+        assert_excess_data(b'abc=a', b'i\xb7\x1a')
+        assert_non_base64_data(b'abc=:', b'i\xb7')
+        assert_non_base64_data(b'ab==\n', b'i')
+        assert_excess_padding(b'abc==', b'i\xb7')
+        assert_excess_padding(b'abc===', b'i\xb7')
+        assert_excess_padding(b'abc====', b'i\xb7')
+        assert_excess_padding(b'abc=====', b'i\xb7')
+
+        assert_non_base64_data(b'\nab==', b'i')
+        assert_non_base64_data(b'ab:(){:|:&};:==', b'i')
+        assert_non_base64_data(b'a\nb==', b'i')
+        assert_non_base64_data(b'a\x00b==', b'i')
+
+        assert_leading_padding(b'=', b'')
+        assert_leading_padding(b'==', b'')
+        assert_leading_padding(b'===', b'')
+        assert_leading_padding(b'====', b'')
+        assert_leading_padding(b'=====', b'')
+        assert_discontinuous_padding(b'ab=c=', b'i\xb7')
+        assert_discontinuous_padding(b'ab=ab==', b'i\xb6\x9b')
+        assert_excess_padding(b'abcd=', b'i\xb7\x1d')
+        assert_excess_padding(b'abcd==', b'i\xb7\x1d')
+        assert_excess_padding(b'abcd===', b'i\xb7\x1d')
+        assert_excess_padding(b'abcd====', b'i\xb7\x1d')
+        assert_excess_padding(b'abcd=====', b'i\xb7\x1d')
+
+    def test_base64_excess_data(self):
+        def assert_excess_data(data, expected):
+            data = self.type2test(data)
+            with self.assertRaisesRegex(binascii.Error, r'(?i)Excess data'):
+                binascii.a2b_base64(data, strict_mode=True)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data), expected)
+
+        assert_excess_data(b'ab==c=', b'i\xb7')
+        assert_excess_data(b'ab==cd', b'i\xb7\x1d')
+        assert_excess_data(b'abc=d', b'i\xb7\x1d')
+
+    def test_base64errors(self):
+        def assert_incorrect_padding(data):
+            with self.assertRaisesRegex(binascii.Error,
+                                        r'(?i)Incorrect padding'):
+                binascii.a2b_base64(self.type2test(data))
+
+        assert_incorrect_padding(b'ab')
+        assert_incorrect_padding(b'ab=')
+        assert_incorrect_padding(b'abc')
+        assert_incorrect_padding(b'abcdef')
+        assert_incorrect_padding(b'abcdef=')
+        assert_incorrect_padding(b'abcdefg')
+        assert_incorrect_padding(b'a=b=')
+        assert_incorrect_padding(b'a\nb=')
+
+        def assert_invalid_length(data):
+            n_data_chars = len(re.sub(br'[^A-Za-z0-9/+]', b'', data))
+            expected_errmsg_re = (
+                r'(?i)Invalid.+number of data characters.+'
+                + str(n_data_chars))
+            with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
+                binascii.a2b_base64(self.type2test(data))
+
+        assert_invalid_length(b'a')
+        assert_invalid_length(b'a=')
+        assert_invalid_length(b'a==')
+        assert_invalid_length(b'a===')
+        assert_invalid_length(b'a' * 5)
+        assert_invalid_length(b'a' * (4 * 87 + 1))
+        assert_invalid_length(b'A\tB\nC ??DE')
+
     def test_uu(self):
         MAX_UU = 45
         lines = []
Index: Python-3.6.15/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ Python-3.6.15/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst	2026-04-18 03:10:57.077147195 +0200
@@ -0,0 +1,4 @@
+Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no
+longer ignores excess data after the first padded quad in non-strict
+(default) mode.  Instead, in conformance with :rfc:`4648`, section 3.3, it now ignores
+the pad character, "=", if it is present before the end of the encoded data.
Index: Python-3.6.15/Modules/binascii.c
===================================================================
--- Python-3.6.15.orig/Modules/binascii.c	2021-09-04 05:49:41.000000000 +0200
+++ Python-3.6.15/Modules/binascii.c	2026-04-18 03:20:40.560664222 +0200
@@ -390,52 +390,35 @@
 }
 
 
-static int
-binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
-{
-    /* Finds & returns the (num+1)th
-    ** valid character for base64, or -1 if none.
-    */
-
-    int ret = -1;
-    unsigned char c, b64val;
-
-    while ((slen > 0) && (ret == -1)) {
-        c = *s;
-        b64val = table_a2b_base64[c & 0x7f];
-        if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
-            if (num == 0)
-                ret = *s;
-            num--;
-        }
-
-        s++;
-        slen--;
-    }
-    return ret;
-}
-
 /*[clinic input]
 binascii.a2b_base64
 
     data: ascii_buffer
     /
+    *
+    strict_mode: bool = False
 
 Decode a line of base64 data.
+
+strict_mode
+  When set to True, bytes that are not part of the base64 standard are not allowed.
+  The same applies to excess data after padding (= / ==).
 [clinic start generated code]*/
 
 static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
-/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
+/*[clinic end generated code: output=5409557788d4f975 input=8c06d486533af0fb]*/
 {
     const unsigned char *ascii_data;
     unsigned char *bin_data;
-    int leftbits = 0;
     unsigned char this_ch;
-    unsigned int leftchar = 0;
+    unsigned char leftchar = 0;
     Py_ssize_t ascii_len, bin_len;
     int quad_pos = 0;
+    int pads = 0;
+    int b64val;
     _PyBytesWriter writer;
+    unsigned char *bin_data_start;
 
     ascii_data = data->buf;
     ascii_len = data->len;
@@ -453,61 +436,102 @@
     bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
     if (bin_data == NULL)
         return NULL;
+    bin_data_start = bin_data;
 
     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
         this_ch = *ascii_data;
 
-        if (this_ch > 0x7f ||
-            this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
+        if (this_ch >= 128) {
+            if (strict_mode) {
+                PyErr_SetString(Error, "Only base64 data is allowed");
+                goto error_end;
+            }
             continue;
+        }
 
-        /* Check for pad sequences and ignore
-        ** the invalid ones.
-        */
         if (this_ch == BASE64_PAD) {
-            if ( (quad_pos < 2) ||
-                 ((quad_pos == 2) &&
-                  (binascii_find_valid(ascii_data, ascii_len, 1)
-                   != BASE64_PAD)) )
-            {
+            pads++;
+            if (quad_pos >= 2 && quad_pos + pads <= 4) {
+                continue;
+            }
+
+            if (!strict_mode) {
                 continue;
             }
-            else {
-                /* A pad sequence means no more input.
-                ** We've already interpreted the data
-                ** from the quad at this point.
-                */
-                leftbits = 0;
+
+            if (quad_pos == 1) {
                 break;
             }
+
+            PyErr_SetString(Error,
+                            (quad_pos == 0 && bin_data == bin_data_start)
+                            ? "Leading padding not allowed"
+                            : "Excess padding not allowed");
+            goto error_end;
         }
 
-        this_ch = table_a2b_base64[*ascii_data];
-        if ( this_ch == (unsigned char) -1 )
+        b64val = (unsigned char)table_a2b_base64[this_ch];
+        if (b64val >= 64) {
+            if (strict_mode) {
+                PyErr_SetString(Error, "Only base64 data is allowed");
+                goto error_end;
+            }
             continue;
+        }
 
-        /*
-        ** Shift it in on the low end, and see if there's
-        ** a byte ready for output.
-        */
-        quad_pos = (quad_pos + 1) & 0x03;
-        leftchar = (leftchar << 6) | (this_ch);
-        leftbits += 6;
-
-        if ( leftbits >= 8 ) {
-            leftbits -= 8;
-            *bin_data++ = (leftchar >> leftbits) & 0xff;
-            leftchar &= ((1 << leftbits) - 1);
+        if (pads && strict_mode) {
+            PyErr_SetString(Error,
+                            (quad_pos + pads == 4)
+                            ? "Excess data after padding"
+                            : "Discontinuous padding not allowed");
+            goto error_end;
         }
+        pads = 0;
+
+        this_ch = (unsigned char)b64val;
+
+        switch (quad_pos) {
+        case 0:
+            quad_pos = 1;
+            leftchar = this_ch;
+            break;
+        case 1:
+            quad_pos = 2;
+            *bin_data++ = (leftchar << 2) | (this_ch >> 4);
+            leftchar = this_ch & 0x0f;
+            break;
+        case 2:
+            quad_pos = 3;
+            *bin_data++ = (leftchar << 4) | (this_ch >> 2);
+            leftchar = this_ch & 0x03;
+            break;
+        case 3:
+            quad_pos = 0;
+            *bin_data++ = (leftchar << 6) | this_ch;
+            leftchar = 0;
+            break;
+        }
+    }
+
+    if (quad_pos == 1) {
+        PyErr_Format(Error,
+                     "Invalid base64-encoded string: "
+                     "number of data characters (%zd) cannot be 1 more "
+                     "than a multiple of 4",
+                     (bin_data - bin_data_start) / 3 * 4 + 1);
+        goto error_end;
     }
 
-    if (leftbits != 0) {
+    if (quad_pos != 0 && quad_pos + pads < 4) {
         PyErr_SetString(Error, "Incorrect padding");
-        _PyBytesWriter_Dealloc(&writer);
-        return NULL;
+        goto error_end;
     }
 
     return _PyBytesWriter_Finish(&writer, bin_data);
+
+error_end:
+    _PyBytesWriter_Dealloc(&writer);
+    return NULL;
 }
 
 
Index: Python-3.6.15/Modules/clinic/binascii.c.h
===================================================================
--- Python-3.6.15.orig/Modules/clinic/binascii.c.h	2021-09-04 05:49:41.000000000 +0200
+++ Python-3.6.15/Modules/clinic/binascii.c.h	2026-04-18 03:17:43.401416366 +0200
@@ -66,27 +66,35 @@
 }
 
 PyDoc_STRVAR(binascii_a2b_base64__doc__,
-"a2b_base64($module, data, /)\n"
+"a2b_base64($module, /, data, *, strict_mode=False)\n"
 "--\n"
 "\n"
-"Decode a line of base64 data.");
+"Decode a line of base64 data.\n"
+"\n"
+"strict_mode\n"
+"  When set to True, bytes that are not part of the base64 standard are not allowed.\n"
+"  The same applies to excess data after padding (= / ==).");
 
 #define BINASCII_A2B_BASE64_METHODDEF    \
-    {"a2b_base64", (PyCFunction)binascii_a2b_base64, METH_O, binascii_a2b_base64__doc__},
+    {"a2b_base64", (PyCFunction)binascii_a2b_base64, METH_FASTCALL, binascii_a2b_base64__doc__},
 
 static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data);
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode);
 
 static PyObject *
-binascii_a2b_base64(PyObject *module, PyObject *arg)
+binascii_a2b_base64(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
 {
     PyObject *return_value = NULL;
+    static const char * const _keywords[] = {"data", "strict_mode", NULL};
+    static _PyArg_Parser _parser = {"O&|$p:a2b_base64", _keywords, 0};
     Py_buffer data = {NULL, NULL};
+    int strict_mode = 0;
 
-    if (!PyArg_Parse(arg, "O&:a2b_base64", ascii_buffer_converter, &data)) {
+    if (!_PyArg_ParseStack(args, nargs, kwnames, &_parser,
+        ascii_buffer_converter, &data, &strict_mode)) {
         goto exit;
     }
-    return_value = binascii_a2b_base64_impl(module, &data);
+    return_value = binascii_a2b_base64_impl(module, &data, strict_mode);
 
 exit:
     /* Cleanup for data */
@@ -550,4 +558,4 @@
 
     return return_value;
 }
-/*[clinic end generated code: output=458eb09731cb7877 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=03ff6186029db42d input=a9049054013a1b77]*/
