Skip to content

Commit 1ed0bac

Browse files
authored
fix: remove ultranormalization of distribution filenames (#124)
1 parent a5e7f03 commit 1ed0bac

File tree

3 files changed

+34
-51
lines changed

3 files changed

+34
-51
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Fixed
11+
12+
- This library no longer enforces distribution name "ultranormalization,"
13+
which went above the requirements specified in PEP 740
14+
([#124](https://github.com/trailofbits/pypi-attestations/pull/124))
15+
1016
## [0.0.25]
1117

1218
### Fixed

src/pypi_attestations/_impl.py

Lines changed: 19 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ class Distribution(BaseModel):
9191
@field_validator("name")
9292
@classmethod
9393
def _validate_name(cls, v: str) -> str:
94-
return _ultranormalize_dist_filename(v)
94+
_check_dist_filename(v)
95+
return v
9596

9697
@classmethod
9798
def from_file(cls, dist: Path) -> Distribution:
@@ -283,8 +284,15 @@ def verify(
283284
raise VerificationError("invalid subject: missing name")
284285

285286
try:
286-
# We always ultranormalize when signing, but other signers may not.
287-
subject_name = _ultranormalize_dist_filename(subject.name)
287+
# We don't allow signing of malformed distribution names.
288+
# Previous versions of this package went further than this
289+
# and "ultranormalized" the name, but this was superfluous
290+
# and caused confusion for users who expected the subject to
291+
# be an exact match for their distribution filename.
292+
# See: https://github.com/pypi/warehouse/issues/18128
293+
# See: https://github.com/trailofbits/pypi-attestations/issues/123
294+
_check_dist_filename(subject.name)
295+
subject_name = subject.name
288296
except ValueError as e:
289297
raise VerificationError(f"invalid subject: {str(e)}")
290298

@@ -384,57 +392,24 @@ def _der_decode_utf8string(der: bytes) -> str:
384392
return der_decode(der, UTF8String)[0].decode() # type: ignore[no-any-return]
385393

386394

387-
def _ultranormalize_dist_filename(dist: str) -> str:
388-
"""Return an "ultranormalized" form of the given distribution filename.
395+
def _check_dist_filename(dist: str) -> None:
396+
"""Validate a distribution filename for well-formedness.
389397
390-
This form is equivalent to the normalized form for sdist and wheel
391-
filenames, with the additional stipulation that compressed tag sets,
392-
if present, are also sorted alphanumerically.
398+
This does **not** fully normalize the filename. For example,
399+
a user can include a non-normalized version string or package name
400+
(or compressed tag set in the case of wheels), and this function
401+
will **not** reject so long as it parses correctly.
393402
394403
Raises `ValueError` on any invalid distribution filename.
395404
"""
396405
# NOTE: .whl and .tar.gz are assumed lowercase, since `packaging`
397406
# already rejects non-lowercase variants.
398407
if dist.endswith(".whl"):
399408
# `parse_wheel_filename` raises a supertype of ValueError on failure.
400-
name, ver, build, tags = parse_wheel_filename(dist)
401-
402-
# The name has been normalized to replace runs of `[.-_]+` with `-`,
403-
# which then needs to be replaced with `_` for the wheel.
404-
name = name.replace("-", "_")
405-
406-
# `parse_wheel_filename` normalizes the name and version for us,
407-
# so all we need to do is re-compress the tag set in a canonical
408-
# order.
409-
# NOTE(ww): This is written in a not very efficient manner, since
410-
# I wasn't feeling smart.
411-
impls, abis, platforms = set(), set(), set()
412-
for tag in tags:
413-
impls.add(tag.interpreter)
414-
abis.add(tag.abi)
415-
platforms.add(tag.platform)
416-
417-
impl_tag = ".".join(sorted(impls))
418-
abi_tag = ".".join(sorted(abis))
419-
platform_tag = ".".join(sorted(platforms))
420-
421-
if build:
422-
parts = "-".join(
423-
[name, str(ver), f"{build[0]}{build[1]}", impl_tag, abi_tag, platform_tag]
424-
)
425-
else:
426-
parts = "-".join([name, str(ver), impl_tag, abi_tag, platform_tag])
427-
428-
return f"{parts}.whl"
409+
parse_wheel_filename(dist)
429410
elif dist.endswith((".tar.gz", ".zip")):
430411
# `parse_sdist_filename` raises a supertype of ValueError on failure.
431-
name, ver = parse_sdist_filename(dist)
432-
name = name.replace("-", "_")
433-
434-
if dist.endswith(".tar.gz"):
435-
return f"{name}-{ver}.tar.gz"
436-
else:
437-
return f"{name}-{ver}.zip"
412+
parse_sdist_filename(dist)
438413
else:
439414
raise ValueError(f"unknown distribution format: {dist}")
440415

test/test_impl.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -555,12 +555,14 @@ def test_exception_types(self) -> None:
555555
("foo-01.0beta1.zip", "foo-1.0b1.zip"),
556556
],
557557
)
558-
def test_ultranormalize_dist_filename(input: str, normalized: str) -> None:
559-
# normalization works as expected
560-
assert impl._ultranormalize_dist_filename(input) == normalized
558+
def test_check_dist_filename(input: str, normalized: str) -> None:
559+
# TODO: assert normalization if/when we re-add it.
561560

562-
# normalization is a fixpoint, and normalized names are valid dist names
563-
assert impl._ultranormalize_dist_filename(normalized) == normalized
561+
# each input is a well-formed dist name
562+
impl._check_dist_filename(input)
563+
564+
# normalized forms are also well-formed
565+
impl._check_dist_filename(normalized)
564566

565567

566568
@pytest.mark.parametrize(
@@ -590,9 +592,9 @@ def test_ultranormalize_dist_filename(input: str, normalized: str) -> None:
590592
"foo-1.2.3.tar.gz.zip",
591593
],
592594
)
593-
def test_ultranormalize_dist_filename_invalid(input: str) -> None:
595+
def test_check_dist_filename_invalid(input: str) -> None:
594596
with pytest.raises(ValueError):
595-
impl._ultranormalize_dist_filename(input)
597+
impl._check_dist_filename(input)
596598

597599

598600
class TestPublisher:

0 commit comments

Comments
 (0)