From aba9f18eba870d14fb52c5ba5d73451349e339c3 Mon Sep 17 00:00:00 2001
From: Cosmin Truta <ctruta@gmail.com>
Date: Sat, 21 Mar 2026 23:48:49 +0200
Subject: [PATCH] fix(arm): Resolve out-of-bounds read/write in NEON palette
 expansion

Both `png_do_expand_palette_rgba8_neon` and
`png_do_expand_palette_rgb8_neon` advanced in fixed-size chunks without
guarding the final iteration, allowing out-of-bounds reads and writes
when the row width is not a multiple of the chunk size.

Restrict the NEON loop to full chunks only, remove the now-unnecessary
post-loop adjustment, and undo the `*ddp` pre-adjustment before the
pointer handoff to the scalar fallback.

Reported-by: Amemoyoi <Amemoyoi@users.noreply.github.com>
Co-authored-by: Amemoyoi <Amemoyoi@users.noreply.github.com>
Signed-off-by: Cosmin Truta <ctruta@gmail.com>
---
 arm/palette_neon_intrinsics.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

Index: libpng-1.6.44/arm/palette_neon_intrinsics.c
===================================================================
--- libpng-1.6.44.orig/arm/palette_neon_intrinsics.c
+++ libpng-1.6.44/arm/palette_neon_intrinsics.c
@@ -80,7 +80,7 @@ png_do_expand_palette_rgba8_neon(png_str
     */
    *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1);
 
-   for (i = 0; i < row_width; i += pixels_per_chunk)
+   for (i = 0; i + pixels_per_chunk <= row_width; i += pixels_per_chunk)
    {
       uint32x4_t cur;
       png_bytep sp = *ssp - i, dp = *ddp - (i << 2);
@@ -90,13 +90,12 @@ png_do_expand_palette_rgba8_neon(png_str
       cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3);
       vst1q_u32((void *)dp, cur);
    }
-   if (i != row_width)
-   {
-      /* Remove the amount that wasn't processed. */
-      i -= pixels_per_chunk;
-   }
 
-   /* Decrement output pointers. */
+   /* Undo the pre-adjustment of *ddp before the pointer handoff,
+    * so the scalar fallback in pngrtran.c receives a dp that points
+    * to the correct position.
+    */
+   *ddp = *ddp + (pixels_per_chunk * 4 - 1);
    *ssp = *ssp - i;
    *ddp = *ddp - (i << 2);
    return i;
@@ -121,7 +120,7 @@ png_do_expand_palette_rgb8_neon(png_stru
    /* Seeking this back by 8 pixels x 3 bytes. */
    *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1);
 
-   for (i = 0; i < row_width; i += pixels_per_chunk)
+   for (i = 0; i + pixels_per_chunk <= row_width; i += pixels_per_chunk)
    {
       uint8x8x3_t cur;
       png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i);
@@ -136,13 +135,11 @@ png_do_expand_palette_rgb8_neon(png_stru
       vst3_u8((void *)dp, cur);
    }
 
-   if (i != row_width)
-   {
-      /* Remove the amount that wasn't processed. */
-      i -= pixels_per_chunk;
-   }
-
-   /* Decrement output pointers. */
+   /* Undo the pre-adjustment of *ddp before the pointer handoff,
+    * so the scalar fallback in pngrtran.c receives a dp that points
+    * to the correct position.
+    */
+   *ddp = *ddp + (pixels_per_chunk * 3 - 1);
    *ssp = *ssp - i;
    *ddp = *ddp - ((i << 1) + i);
    return i;
