From da659fee4898a73dbc75070f3e82d49f745e4628 Mon Sep 17 00:00:00 2001
From: William Woodruff <william@astral.sh>
Date: Wed, 29 Oct 2025 11:11:06 -0400
Subject: [PATCH] Merge commit from fork

* feat: reject ZIP archives with improbable filenames

Signed-off-by: William Woodruff <william@astral.sh>

* use my PR for async_zip temporarily

Signed-off-by: William Woodruff <william@astral.sh>

* update snapshot

Signed-off-by: William Woodruff <william@astral.sh>

* two more tests

Signed-off-by: William Woodruff <william@astral.sh>

* update rev

Signed-off-by: William Woodruff <william@astral.sh>

---------

Signed-off-by: William Woodruff <william@astral.sh>
---
 Cargo.lock                        |  3 +-
 Cargo.toml                        |  2 +-
 crates/uv-extract/Cargo.toml      |  1 +
 crates/uv-extract/src/error.rs    |  4 ++
 crates/uv-extract/src/lib.rs      | 93 +++++++++++++++++++++++++++++++
 crates/uv-extract/src/stream.rs   | 32 +++++------
 crates/uv-extract/src/sync.rs     |  9 ++-
 crates/uv/tests/it/pip_install.rs | 19 +++++++
 8 files changed, 143 insertions(+), 20 deletions(-)

Index: uv-0.7.18/Cargo.toml
===================================================================
--- uv-0.7.18.orig/Cargo.toml
+++ uv-0.7.18/Cargo.toml
@@ -80,7 +80,7 @@ async-channel = { version = "2.3.1" }
 async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
 async-trait = { version = "0.1.82" }
 async_http_range_reader = { version = "0.9.1" }
-async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "285e48742b74ab109887d62e1ae79e7c15fd4878", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] }
+async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "f6a41d32866003c868d03ed791a89c794f61b703", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] }
 axoupdater = { version = "0.9.0", default-features = false }
 backon = { version = "1.3.0" }
 base64 = { version = "0.22.1" }
@@ -88,7 +88,8 @@ bitflags = { version = "2.6.0" }
 blake2 = { version = "0.10.6" }
 boxcar = { version = "0.2.5" }
 bytecheck = { version = "0.8.0" }
-cargo-util = { version = "0.2.14" }
+# Last version that supports rust 1.88
+cargo-util = { version = "=0.2.23" }
 clap = { version = "4.5.17", features = ["derive", "env", "string", "wrap_help"] }
 clap_complete_command = { version = "0.6.1" }
 configparser = { version = "3.1.0" }
Index: uv-0.7.18/crates/uv-extract/Cargo.toml
===================================================================
--- uv-0.7.18.orig/crates/uv-extract/Cargo.toml
+++ uv-0.7.18/crates/uv-extract/Cargo.toml
@@ -29,6 +29,7 @@ fs-err = { workspace = true, features =
 futures = { workspace = true }
 md-5 = { workspace = true }
 rayon = { workspace = true }
+regex = { workspace = true }
 reqwest = { workspace = true }
 rustc-hash = { workspace = true }
 sha2 = { workspace = true }
Index: uv-0.7.18/crates/uv-extract/src/error.rs
===================================================================
--- uv-0.7.18.orig/crates/uv-extract/src/error.rs
+++ uv-0.7.18/crates/uv-extract/src/error.rs
@@ -87,6 +87,10 @@ pub enum Error {
     ExtensibleData,
     #[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())]
     DuplicateExecutableFileHeader { path: PathBuf },
+    #[error("Archive contains a file with an empty filename")]
+    EmptyFilename,
+    #[error("Archive contains unacceptable filename: {filename}")]
+    UnacceptableFilename { filename: String },
 }
 
 impl Error {
Index: uv-0.7.18/crates/uv-extract/src/lib.rs
===================================================================
--- uv-0.7.18.orig/crates/uv-extract/src/lib.rs
+++ uv-0.7.18/crates/uv-extract/src/lib.rs
@@ -1,8 +1,101 @@
+use std::sync::LazyLock;
+
 pub use error::Error;
+use regex::Regex;
 pub use sync::*;
+use uv_static::EnvVars;
 
 mod error;
 pub mod hash;
 pub mod stream;
 mod sync;
 mod vendor;
+
+static CONTROL_CHARACTERS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{C}").unwrap());
+static REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
+
+/// Validate that a given filename (e.g. reported by a ZIP archive's
+/// local file entries or central directory entries) is "safe" to use.
+///
+/// "Safe" in this context doesn't refer to directory traversal
+/// risk, but whether we believe that other ZIP implementations
+/// handle the name correctly and consistently.
+///
+/// Specifically, we want to avoid names that:
+///
+/// - Contain *any* non-printable characters
+/// - Are empty
+///
+/// In the future, we may also want to check for names that contain
+/// leading/trailing whitespace, or names that are exceedingly long.
+pub(crate) fn validate_archive_member_name(name: &str) -> Result<(), Error> {
+    if name.is_empty() {
+        return Err(Error::EmptyFilename);
+    }
+
+    match CONTROL_CHARACTERS_RE.replace_all(name, REPLACEMENT_CHARACTER) {
+        // No replacements mean no control characters.
+        std::borrow::Cow::Borrowed(_) => Ok(()),
+        std::borrow::Cow::Owned(sanitized) => Err(Error::UnacceptableFilename {
+            filename: sanitized,
+        }),
+    }
+}
+
+/// Returns `true` if ZIP validation is disabled.
+pub(crate) fn insecure_no_validate() -> bool {
+    // TODO(charlie) Parse this in `EnvironmentOptions`.
+    let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
+        return false;
+    };
+    let Some(value) = value.to_str() else {
+        return false;
+    };
+    matches!(
+        value.to_lowercase().as_str(),
+        "y" | "yes" | "t" | "true" | "on" | "1"
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_validate_archive_member_name() {
+        for (testcase, ok) in &[
+            // Valid cases.
+            ("normal.txt", true),
+            ("__init__.py", true),
+            ("fine i guess.py", true),
+            ("🌈.py", true),
+            // Invalid cases.
+            ("", false),
+            ("new\nline.py", false),
+            ("carriage\rreturn.py", false),
+            ("tab\tcharacter.py", false),
+            ("null\0byte.py", false),
+            ("control\x01code.py", false),
+            ("control\x02code.py", false),
+            ("control\x03code.py", false),
+            ("control\x04code.py", false),
+            ("backspace\x08code.py", false),
+            ("delete\x7fcode.py", false),
+        ] {
+            assert_eq!(
+                super::validate_archive_member_name(testcase).is_ok(),
+                *ok,
+                "testcase: {testcase}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_unacceptable_filename_error_replaces_control_characters() {
+        let err = super::validate_archive_member_name("bad\nname").unwrap_err();
+        match err {
+            super::Error::UnacceptableFilename { filename } => {
+                assert_eq!(filename, "bad�name");
+            }
+            _ => panic!("expected UnacceptableFilename error"),
+        }
+    }
+}
Index: uv-0.7.18/crates/uv-extract/src/stream.rs
===================================================================
--- uv-0.7.18.orig/crates/uv-extract/src/stream.rs
+++ uv-0.7.18/crates/uv-extract/src/stream.rs
@@ -9,9 +9,8 @@ use tokio_util::compat::{FuturesAsyncRea
 use tracing::{debug, warn};
 
 use uv_distribution_filename::SourceDistExtension;
-use uv_static::EnvVars;
 
-use crate::Error;
+use crate::{Error, insecure_no_validate, validate_archive_member_name};
 
 const DEFAULT_BUF_SIZE: usize = 128 * 1024;
 
@@ -39,21 +38,6 @@ struct ComputedEntry {
     compressed_size: u64,
 }
 
-/// Returns `true` if ZIP validation is disabled.
-fn insecure_no_validate() -> bool {
-    // TODO(charlie) Parse this in `EnvironmentOptions`.
-    let Some(value) = std::env::var_os(EnvVars::UV_INSECURE_NO_ZIP_VALIDATION) else {
-        return false;
-    };
-    let Some(value) = value.to_str() else {
-        return false;
-    };
-    matches!(
-        value.to_lowercase().as_str(),
-        "y" | "yes" | "t" | "true" | "on" | "1"
-    )
-}
-
 /// Unpack a `.zip` archive into the target directory, without requiring `Seek`.
 ///
 /// This is useful for unzipping files as they're being downloaded. If the archive
@@ -102,6 +86,13 @@ pub async fn unzip<R: tokio::io::AsyncRe
             Err(err) => return Err(err.into()),
         };
 
+        // Apply sanity checks to the file names in local headers.
+        if let Err(e) = validate_archive_member_name(path) {
+            if !skip_validation {
+                return Err(e);
+            }
+        }
+
         // Sanitize the file name to prevent directory traversal attacks.
         let Some(relpath) = enclosed_name(path) else {
             warn!("Skipping unsafe file name: {path}");
@@ -362,6 +353,13 @@ pub async fn unzip<R: tokio::io::AsyncRe
                     Err(err) => return Err(err.into()),
                 };
 
+                // Apply sanity checks to the file names in CD headers.
+                if let Err(e) = validate_archive_member_name(path) {
+                    if !skip_validation {
+                        return Err(e);
+                    }
+                }
+
                 // Sanitize the file name to prevent directory traversal attacks.
                 let Some(relpath) = enclosed_name(path) else {
                     continue;
Index: uv-0.7.18/crates/uv-extract/src/sync.rs
===================================================================
--- uv-0.7.18.orig/crates/uv-extract/src/sync.rs
+++ uv-0.7.18/crates/uv-extract/src/sync.rs
@@ -1,8 +1,8 @@
 use std::path::{Path, PathBuf};
 use std::sync::{LazyLock, Mutex};
 
-use crate::Error;
 use crate::vendor::{CloneableSeekableReader, HasLength};
+use crate::{Error, insecure_no_validate, validate_archive_member_name};
 use rayon::prelude::*;
 use rustc_hash::FxHashSet;
 use tracing::warn;
@@ -18,6 +18,7 @@ pub fn unzip<R: Send + std::io::Read + s
     let reader = std::io::BufReader::new(reader);
     let archive = ZipArchive::new(CloneableSeekableReader::new(reader))?;
     let directories = Mutex::new(FxHashSet::default());
+    let skip_validation = insecure_no_validate();
     // Initialize the threadpool with the user settings.
     LazyLock::force(&RAYON_INITIALIZE);
     (0..archive.len())
@@ -26,6 +27,12 @@ pub fn unzip<R: Send + std::io::Read + s
             let mut archive = archive.clone();
             let mut file = archive.by_index(file_number)?;
 
+            if let Err(e) = validate_archive_member_name(file.name()) {
+                if !skip_validation {
+                    return Err(e);
+                }
+            }
+
             // Determine the path of the file within the wheel.
             let Some(enclosed_name) = file.enclosed_name() else {
                 warn!("Skipping unsafe file name: {}", file.name());
Index: uv-0.7.18/crates/uv/tests/it/pip_install.rs
===================================================================
--- uv-0.7.18.orig/crates/uv/tests/it/pip_install.rs
+++ uv-0.7.18/crates/uv/tests/it/pip_install.rs
@@ -11510,6 +11510,25 @@ fn conflicting_flags_clap_bug() {
 }
 
 #[test]
+fn reject_invalid_archive_member_names() {
+    let context = TestContext::new("3.12").with_exclude_newer("2025-10-07T00:00:00Z");
+
+    uv_snapshot!(context.filters(), context.pip_install()
+        .arg("cbwheeldiff2==0.0.1"), @r"
+    success: false
+    exit_code: 1
+    ----- stdout -----
+
+    ----- stderr -----
+    Resolved 1 package in [TIME]
+      × Failed to download `cbwheeldiff2==0.0.1`
+      ├─▶ Failed to extract archive: cbwheeldiff2-0.0.1-py2.py3-none-any.whl
+      ╰─▶ Archive contains unacceptable filename: cbwheeldiff2-0.0.1.dist-info/RECORD�
+    "
+    );
+}
+
+#[test]
 fn reject_invalid_streaming_zip() {
     let context = TestContext::new("3.12").with_exclude_newer("2025-07-10T00:00:00Z");
 
