From c94eede797b56108216859f7e0a698b16c0ab7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurenz=20Altenm=C3=BCller?= Date: Wed, 14 Jan 2026 21:14:30 +0100 Subject: [PATCH 1/2] Quote all files if original RECORD had all files quoted --- python/private/pypi/repack_whl.py | 8 +++-- tools/wheelmaker.py | 58 ++++++++++++++++--------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/python/private/pypi/repack_whl.py b/python/private/pypi/repack_whl.py index 519631f272..59b5a2b8fa 100644 --- a/python/private/pypi/repack_whl.py +++ b/python/private/pypi/repack_whl.py @@ -151,17 +151,21 @@ def main(sys_argv): logging.debug(f"Found dist-info dir: {distinfo_dir}") record_path = distinfo_dir / "RECORD" record_contents = record_path.read_text() if record_path.exists() else "" + quote_files = all(line.startswith('"') for line in record_contents.splitlines()) distribution_prefix = distinfo_dir.with_suffix("").name with _WhlFile( - args.output, mode="w", distribution_prefix=distribution_prefix + args.output, + mode="w", + distribution_prefix=distribution_prefix, + quote_all_filenames=quote_files, ) as out: for p in _files_to_pack(patched_wheel_dir, record_contents): rel_path = p.relative_to(patched_wheel_dir) out.add_file(str(rel_path), p) logging.debug(f"Writing RECORD file") - got_record = out.add_recordfile().decode("utf-8", "surrogateescape") + got_record = out.add_recordfile() if got_record == record_contents: logging.info(f"Created a whl file: {args.output}") diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py index de6b8f48af..546c9893b6 100644 --- a/tools/wheelmaker.py +++ b/tools/wheelmaker.py @@ -132,13 +132,17 @@ def __init__( distribution_prefix: str, strip_path_prefixes=None, compression=zipfile.ZIP_DEFLATED, + quote_all_filenames: bool = False, **kwargs, ): self._distribution_prefix = distribution_prefix self._strip_path_prefixes = strip_path_prefixes or [] - # Entries for the RECORD file as (filename, hash, size) tuples. - self._record = [] + # Entries for the RECORD file as (filename, digest, size) tuples. + self._record: list[tuple[str, str, str]] = [] + # Whether to quote filenames in the RECORD file (for compatibility with + # some wheels like torch that have quoted filenames in their RECORD). + self.quote_all_filenames = quote_all_filenames super().__init__(filename, mode=mode, compression=compression, **kwargs) @@ -192,16 +196,15 @@ def add_string(self, filename, contents): hash.update(contents) self._add_to_record(filename, self._serialize_digest(hash), len(contents)) - def _serialize_digest(self, hash): + def _serialize_digest(self, hash) -> str: # https://www.python.org/dev/peps/pep-0376/#record # "base64.urlsafe_b64encode(digest) with trailing = removed" digest = base64.urlsafe_b64encode(hash.digest()) digest = b"sha256=" + digest.rstrip(b"=") - return digest + return digest.decode("utf-8", "surrogateescape") - def _add_to_record(self, filename, hash, size): - size = str(size).encode("ascii") - self._record.append((filename, hash, size)) + def _add_to_record(self, filename: str, hash: str, size: int) -> None: + self._record.append((filename, hash, str(size))) def _zipinfo(self, filename): """Construct deterministic ZipInfo entry for a file named filename""" @@ -223,29 +226,28 @@ def _zipinfo(self, filename): zinfo.compress_type = self.compression return zinfo - def add_recordfile(self): + def _quote_filename(self, filename: str) -> str: + """Return a possibly quoted filename for RECORD file.""" + # Use csv writer to auto-quote the filename (may contain ",") + with io.StringIO() as buf: + csv.writer(buf).writerow([filename.lstrip("/")]) + filename = buf.getvalue().strip() + # Some RECORDs like torch have *all* filenames quoted and we must minimize diff + if self.quote_all_filenames and not filename.startswith('"'): + filename = f'"{filename}"' + return filename + + def add_recordfile(self) -> str: """Write RECORD file to the distribution.""" record_path = self.distinfo_path("RECORD") - entries = self._record + [(record_path, b"", b"")] - with io.StringIO() as contents_io: - writer = csv.writer(contents_io, lineterminator="\n") - for filename, digest, size in entries: - if isinstance(filename, str): - filename = filename.lstrip("/") - writer.writerow( - ( - ( - c - if isinstance(c, str) - else c.decode("utf-8", "surrogateescape") - ) - for c in (filename, digest, size) - ) - ) - - contents = contents_io.getvalue() - self.add_string(record_path, contents) - return contents.encode("utf-8", "surrogateescape") + entries = self._record + [(record_path, "", "")] + entries = [ + (self._quote_filename(fname), digest, size) + for fname, digest, size in entries + ] + contents = "\n".join(",".join(entry) for entry in entries) + "\n" + self.add_string(record_path, contents) + return contents class WheelMaker(object): From de1987b5b22ff764f11f1c5690b8f79af076c302 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 14 Jan 2026 21:31:41 +0100 Subject: [PATCH 2/2] Update tools/wheelmaker.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- tools/wheelmaker.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py index 546c9893b6..4390df3445 100644 --- a/tools/wheelmaker.py +++ b/tools/wheelmaker.py @@ -228,14 +228,13 @@ def _zipinfo(self, filename): def _quote_filename(self, filename: str) -> str: """Return a possibly quoted filename for RECORD file.""" - # Use csv writer to auto-quote the filename (may contain ",") + filename = filename.lstrip("/") + # Some RECORDs like torch have *all* filenames quoted and we must minimize diff. + # Otherwise, we quote only when necessary (e.g. for filenames with commas). + quoting = csv.QUOTE_ALL if self.quote_all_filenames else csv.QUOTE_MINIMAL with io.StringIO() as buf: - csv.writer(buf).writerow([filename.lstrip("/")]) - filename = buf.getvalue().strip() - # Some RECORDs like torch have *all* filenames quoted and we must minimize diff - if self.quote_all_filenames and not filename.startswith('"'): - filename = f'"{filename}"' - return filename + csv.writer(buf, quoting=quoting).writerow([filename]) + return buf.getvalue().strip() def add_recordfile(self) -> str: """Write RECORD file to the distribution."""