Skip to content

Commit f555c7f

Browse files
committed
fix: work around PyPI's zipbomb mitigation measures
1 parent a75d04f commit f555c7f

3 files changed

Lines changed: 198 additions & 2 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,6 @@ src/jntajis/_version.py
154154

155155
# automatically generated C source
156156
src/jntajis/_jntajis.c
157+
158+
# Temporary directory used by the coding agent
159+
.agents/tmp

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,12 @@ build = ["cp310-*", "cp311-*", "cp312-*", "cp313-*"]
104104
# skip = "pp*"
105105

106106
[tool.cibuildwheel.linux]
107-
before-build = "find /project -iname '*.so' | xargs rm -f"
107+
before-build = "find {project} -iname '*.so' | xargs rm -f"
108108

109109
[tool.cibuildwheel.windows]
110110
before-build = "DEL /S /Q *.pyd"
111+
repair-wheel-command = "python {project}/tools/repack_wheel.py {wheel} {dest_dir}"
111112

112113
[tool.cibuildwheel.macos]
113114
archs = ["universal2"]
114-
before-build = "find . -iname '*.so' | xargs rm -f "
115+
before-build = "find {project} -iname '*.so' | xargs rm -f "

tools/repack_wheel.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
"""Repack wheels to limit per-entry inflation below a threshold.
2+
3+
PyPI rejects wheels where any zip entry inflates over 50x (zip bomb detection).
4+
This script re-compresses entries that exceed the limit using deflate with
5+
frequent Z_FULL_FLUSH to reduce compression efficiency while keeping the
6+
data genuinely compressed.
7+
8+
Usage (cibuildwheel repair-wheel-command):
9+
# Single wheel -> dest_dir (Windows, no prior repair step)
10+
python tools/repack_wheel.py WHEEL DEST_DIR
11+
12+
# All wheels in a directory, in-place (after auditwheel/delocate)
13+
python tools/repack_wheel.py DEST_DIR
14+
"""
15+
16+
import os
17+
import struct
18+
import sys
19+
import time
20+
import zlib
21+
import zipfile
22+
23+
CHUNK_SIZE = 4096
24+
MAX_INFLATION = 50
25+
26+
27+
def compress_chunked(data, chunk_size):
28+
"""Compress data with frequent flushes to reduce compression efficiency."""
29+
c = zlib.compressobj(1, zlib.DEFLATED, -15)
30+
parts = []
31+
for i in range(0, len(data), chunk_size):
32+
chunk = data[i:i + chunk_size]
33+
parts.append(c.compress(chunk))
34+
parts.append(c.flush(zlib.Z_FULL_FLUSH))
35+
parts.append(c.flush(zlib.Z_FINISH))
36+
return b''.join(parts)
37+
38+
39+
def make_dos_datetime(dt):
40+
mod_time = (dt[3] << 11) | (dt[4] << 5) | (dt[5] // 2)
41+
mod_date = ((dt[0] - 1980) << 9) | (dt[1] << 5) | dt[2]
42+
return mod_time, mod_date
43+
44+
45+
def repack_wheel(src_path, dst_path):
46+
"""Repack a wheel, limiting per-entry inflation below MAX_INFLATION.
47+
48+
src_path and dst_path may be the same file (in-place repack).
49+
"""
50+
with zipfile.ZipFile(src_path, 'r') as zin:
51+
items_data = [(item, zin.read(item.filename)) for item in zin.infolist()]
52+
53+
tmp_path = dst_path + '.tmp'
54+
needs_repack = False
55+
56+
with open(tmp_path, 'wb') as f:
57+
central_dir = []
58+
59+
for item, data in items_data:
60+
crc = zlib.crc32(data) & 0xFFFFFFFF
61+
uncompressed_size = len(data)
62+
63+
# Compress normally first
64+
c = zlib.compressobj(6, zlib.DEFLATED, -15)
65+
compressed = c.compress(data) + c.flush()
66+
67+
# Check if inflation exceeds limit
68+
if len(compressed) > 0 and uncompressed_size / len(compressed) >= MAX_INFLATION:
69+
needs_repack = True
70+
t0 = time.time()
71+
compressed = compress_chunked(data, CHUNK_SIZE)
72+
elapsed = time.time() - t0
73+
inflation = uncompressed_size / len(compressed)
74+
print(f' {item.filename}: {uncompressed_size} -> {len(compressed)} '
75+
f'({len(compressed) / uncompressed_size * 100:.1f}%, {inflation:.1f}x) [{elapsed:.1f}s]')
76+
77+
compress_type = zipfile.ZIP_DEFLATED
78+
compressed_size = len(compressed)
79+
80+
local_header_offset = f.tell()
81+
fname = item.filename.encode('utf-8')
82+
mod_time, mod_date = make_dos_datetime(item.date_time)
83+
84+
# Local file header
85+
f.write(struct.pack(
86+
'<4sHHHHHIIIHH',
87+
b'PK\x03\x04',
88+
20, # version needed
89+
0, # flags
90+
compress_type,
91+
mod_time,
92+
mod_date,
93+
crc,
94+
compressed_size,
95+
uncompressed_size,
96+
len(fname),
97+
0, # extra length
98+
))
99+
f.write(fname)
100+
f.write(compressed)
101+
102+
central_dir.append((
103+
fname, compress_type, mod_time, mod_date,
104+
crc, compressed_size, uncompressed_size,
105+
local_header_offset, item.external_attr,
106+
))
107+
108+
# Central directory
109+
cd_offset = f.tell()
110+
for (fname, ct, mt, md, crc, cs, us, offset, ext_attr) in central_dir:
111+
f.write(struct.pack(
112+
'<4sHHHHHHIIIHHHHHII',
113+
b'PK\x01\x02',
114+
20, # version made by
115+
20, # version needed
116+
0, # flags
117+
ct, mt, md, crc, cs, us,
118+
len(fname),
119+
0, # extra length
120+
0, # comment length
121+
0, # disk number start
122+
0, # internal attributes
123+
ext_attr,
124+
offset,
125+
))
126+
f.write(fname)
127+
128+
cd_size = f.tell() - cd_offset
129+
130+
# End of central directory
131+
f.write(struct.pack(
132+
'<4sHHHHIIH',
133+
b'PK\x05\x06',
134+
0, 0,
135+
len(central_dir),
136+
len(central_dir),
137+
cd_size,
138+
cd_offset,
139+
0,
140+
))
141+
142+
if needs_repack:
143+
os.replace(tmp_path, dst_path)
144+
else:
145+
os.unlink(tmp_path)
146+
if src_path != dst_path:
147+
import shutil
148+
shutil.copy2(src_path, dst_path)
149+
150+
151+
def verify_wheel(whl_path):
152+
with zipfile.ZipFile(whl_path, 'r') as zf:
153+
max_inflation = 0
154+
for info in zf.infolist():
155+
if info.compress_size > 0:
156+
inf = info.file_size / info.compress_size
157+
max_inflation = max(max_inflation, inf)
158+
zf.testzip()
159+
fsize = os.path.getsize(whl_path)
160+
return fsize, max_inflation
161+
162+
163+
def main():
164+
if len(sys.argv) == 3 and not os.path.isdir(sys.argv[1]):
165+
# Mode: repack_wheel.py WHEEL DEST_DIR
166+
src_wheel = sys.argv[1]
167+
dest_dir = sys.argv[2]
168+
basename = os.path.basename(src_wheel)
169+
dst_wheel = os.path.join(dest_dir, basename)
170+
print(f'Repacking {basename}...')
171+
repack_wheel(src_wheel, dst_wheel)
172+
fsize, max_inf = verify_wheel(dst_wheel)
173+
print(f' -> {fsize:,} bytes, max entry inflation={max_inf:.1f}x')
174+
elif len(sys.argv) == 2 and os.path.isdir(sys.argv[1]):
175+
# Mode: repack_wheel.py DEST_DIR (in-place)
176+
dest_dir = sys.argv[1]
177+
for name in sorted(os.listdir(dest_dir)):
178+
if not name.endswith('.whl'):
179+
continue
180+
path = os.path.join(dest_dir, name)
181+
print(f'Repacking {name}...')
182+
repack_wheel(path, path)
183+
fsize, max_inf = verify_wheel(path)
184+
print(f' -> {fsize:,} bytes, max entry inflation={max_inf:.1f}x')
185+
else:
186+
print(f'Usage: {sys.argv[0]} WHEEL DEST_DIR', file=sys.stderr)
187+
print(f' {sys.argv[0]} DEST_DIR', file=sys.stderr)
188+
sys.exit(1)
189+
190+
191+
if __name__ == '__main__':
192+
main()

0 commit comments

Comments
 (0)