Skip to content

gh-76909: Add preset parameters to the zipfile library to control the LZMA compression filter preset #92854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ ZipFile Objects


.. class:: ZipFile(file, mode='r', compression=ZIP_STORED, allowZip64=True, \
compresslevel=None, *, strict_timestamps=True, \
compresslevel=None, preset=None, *, strict_timestamps=True, \
metadata_encoding=None)

Open a ZIP file, where *file* can be a path to a file (a string), a
Expand Down Expand Up @@ -178,6 +178,12 @@ ZipFile Objects
When using :const:`ZIP_BZIP2` integers ``1`` through ``9`` are accepted
(see :class:`bz2 <bz2.BZ2File>` for more information).

The *preset* parameter controls the LZMA preset to use when
writing files to the archive.
When using :const:`ZIP_LZMA` integers ``0`` through ``9`` are accepted
(see :class:`lzma <lzma.LZMAFile>` for more information).
When using :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` it has no effect.

The *strict_timestamps* argument, when set to ``False``, allows to
zip files older than 1980-01-01 at the cost of setting the
timestamp to 1980-01-01.
Expand Down Expand Up @@ -242,6 +248,9 @@ ZipFile Objects
Added support for specifying member name encoding for reading
metadata in the zipfile's directory and file headers.

.. versionadded:: 3.12
Added the *preset* parameter.


.. method:: ZipFile.close()

Expand Down Expand Up @@ -407,13 +416,13 @@ ZipFile Objects


.. method:: ZipFile.write(filename, arcname=None, compress_type=None, \
compresslevel=None)
compresslevel=None, preset=None)

Write the file named *filename* to the archive, giving it the archive name
*arcname* (by default, this will be the same as *filename*, but without a drive
letter and with leading path separators removed). If given, *compress_type*
overrides the value given for the *compression* parameter to the constructor for
the new entry. Similarly, *compresslevel* will override the constructor if
the new entry. Similarly, *compresslevel* and *preset* will override the constructor if
given.
The archive must be open with mode ``'w'``, ``'x'`` or ``'a'``.

Expand Down Expand Up @@ -448,7 +457,7 @@ ZipFile Objects


.. method:: ZipFile.writestr(zinfo_or_arcname, data, compress_type=None, \
compresslevel=None)
compresslevel=None, preset=None)

Write a file into the archive. The contents is *data*, which may be either
a :class:`str` or a :class:`bytes` instance; if it is a :class:`str`,
Expand All @@ -460,8 +469,8 @@ ZipFile Objects

If given, *compress_type* overrides the value given for the *compression*
parameter to the constructor for the new entry, or in the *zinfo_or_arcname*
(if that is a :class:`ZipInfo` instance). Similarly, *compresslevel* will
override the constructor if given.
(if that is a :class:`ZipInfo` instance). Similarly, *compresslevel* and
*preset* will override the constructor if given.

.. note::

Expand Down
42 changes: 38 additions & 4 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def setUp(self):
with open(TESTFN, "wb") as fp:
fp.write(self.data)

def make_test_archive(self, f, compression, compresslevel=None):
kwargs = {'compression': compression, 'compresslevel': compresslevel}
def make_test_archive(self, f, compression, compresslevel=None, preset=None):
kwargs = {'compression': compression, 'compresslevel': compresslevel, 'preset': preset}
# Create the ZIP archive
with zipfile.ZipFile(f, "w", **kwargs) as zipfp:
zipfp.write(TESTFN, "another.name")
Expand All @@ -73,8 +73,8 @@ def make_test_archive(self, f, compression, compresslevel=None):
for line in self.line_gen:
f.write(line)

def zip_test(self, f, compression, compresslevel=None):
self.make_test_archive(f, compression, compresslevel)
def zip_test(self, f, compression, compresslevel=None, preset=None):
self.make_test_archive(f, compression, compresslevel, preset=None)

# Read the ZIP archive
with zipfile.ZipFile(f, "r", compression) as zipfp:
Expand Down Expand Up @@ -323,6 +323,23 @@ def test_writestr_compresslevel(self):
self.assertEqual(b_info.compress_type, self.compression)
self.assertEqual(b_info._compresslevel, 2)

def test_writestr_preset(self):
zipfp = zipfile.ZipFile(TESTFN2, "w", preset=1)
zipfp.writestr("a.txt", "hello world", compress_type=self.compression)
zipfp.writestr("b.txt", "hello world", compress_type=self.compression,
preset=2)

# Preset follows the constructor.
a_info = zipfp.getinfo('a.txt')
self.assertEqual(a_info.compress_type, self.compression)
self.assertEqual(a_info._preset, 1)

# Preset is overridden.
b_info = zipfp.getinfo('b.txt')
self.assertEqual(b_info.compress_type, self.compression)
self.assertEqual(b_info._preset, 2)


def test_read_return_size(self):
# Issue #9837: ZipExtFile.read() shouldn't return more bytes
# than requested.
Expand Down Expand Up @@ -400,6 +417,11 @@ def test_compresslevel_basic(self):
for f in get_files(self):
self.zip_test(f, self.compression, compresslevel=9)

def test_preset_basic(self):
for f in get_files(self):
self.zip_test(f, self.compression, preset=9)


def test_per_file_compresslevel(self):
"""Check that files within a Zip archive can have different
compression levels."""
Expand All @@ -411,6 +433,18 @@ def test_per_file_compresslevel(self):
self.assertEqual(one_info._compresslevel, 1)
self.assertEqual(nine_info._compresslevel, 9)

def test_per_file_preset(self):
"""Check that files within a Zip archive can have different
presets."""
with zipfile.ZipFile(TESTFN2, "w", preset=1) as zipfp:
zipfp.write(TESTFN, 'compress_1')
zipfp.write(TESTFN, 'compress_9', preset=9)
one_info = zipfp.getinfo('compress_1')
nine_info = zipfp.getinfo('compress_9')
self.assertEqual(one_info._preset, 1)
self.assertEqual(nine_info._preset, 9)


def test_writing_errors(self):
class BrokenFile(io.BytesIO):
def write(self, data):
Expand Down
49 changes: 38 additions & 11 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ class ZipInfo (object):
'date_time',
'compress_type',
'_compresslevel',
'_preset',
'comment',
'extra',
'create_system',
Expand Down Expand Up @@ -390,6 +391,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
# Standard values:
self.compress_type = ZIP_STORED # Type of compression for the file
self._compresslevel = None # Level for the compressor
self._preset = None # Preset for LZMA compression
self.comment = b"" # Comment for each file
self.extra = b"" # ZIP extra data
if sys.platform == 'win32':
Expand Down Expand Up @@ -617,14 +619,22 @@ def decrypter(data):

class LZMACompressor:

def __init__(self):
def __init__(self, preset=None):
self._comp = None
self.preset = preset

def _init(self):
props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
])
filter_properties = {'id': lzma.FILTER_LZMA1}
if self.preset:
filter_properties["preset"] = self.preset
props = lzma._encode_filter_properties(filter_properties)

self._comp = lzma.LZMACompressor(
lzma.FORMAT_RAW,
filters=[
lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
]
)
return struct.pack('<BBH', 9, 4, len(props)) + props

def compress(self, data):
Expand Down Expand Up @@ -705,7 +715,7 @@ def _check_compression(compression):
raise NotImplementedError("That compression method is not supported")


def _get_compressor(compress_type, compresslevel=None):
def _get_compressor(compress_type, compresslevel=None, preset=None):
if compress_type == ZIP_DEFLATED:
if compresslevel is not None:
return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
Expand All @@ -716,6 +726,8 @@ def _get_compressor(compress_type, compresslevel=None):
return bz2.BZ2Compressor()
# compresslevel is ignored for ZIP_LZMA
elif compress_type == ZIP_LZMA:
if preset is not None:
return LZMACompressor(preset=preset)
return LZMACompressor()
else:
return None
Expand Down Expand Up @@ -1134,7 +1146,7 @@ def __init__(self, zf, zinfo, zip64):
self._zip64 = zip64
self._zipfile = zf
self._compressor = _get_compressor(zinfo.compress_type,
zinfo._compresslevel)
zinfo._compresslevel, zinfo._preset)
self._file_size = 0
self._compress_size = 0
self._crc = 0
Expand Down Expand Up @@ -1217,7 +1229,7 @@ class ZipFile:
""" Class with methods to open, read, write, close, list zip files.

z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
compresslevel=None)
compresslevel=None, preset=None)

file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
Expand All @@ -1233,14 +1245,19 @@ class ZipFile:
When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
When using ZIP_DEFLATED integers 0 through 9 are accepted.
When using ZIP_BZIP2 integers 1 through 9 are accepted.
preset: None (default for the given compression type) or an integer
specifying the level to pass to the compressor.
When using ZIP_LZMA integers 0 through 9 are accepted.
When using ZIP_STORED, ZIP_DEFLATED, or ZIP_BZIP2 this keyword has no effect.


"""

fp = None # Set here since __del__ checks it
_windows_illegal_name_trans_table = None

def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
compresslevel=None, preset=None, *, strict_timestamps=True, metadata_encoding=None):
"""Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
or append 'a'."""
if mode not in ('r', 'w', 'x', 'a'):
Expand All @@ -1255,6 +1272,7 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
self.filelist = [] # List of ZipInfo instances for archive
self.compression = compression # Method of compression
self.compresslevel = compresslevel
self.preset = preset
self.mode = mode
self.pwd = None
self._comment = b''
Expand Down Expand Up @@ -1537,6 +1555,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
zinfo = ZipInfo(name)
zinfo.compress_type = self.compression
zinfo._compresslevel = self.compresslevel
zinfo._preset = self.preset
else:
# Get info object for name
zinfo = self.getinfo(name)
Expand Down Expand Up @@ -1755,7 +1774,7 @@ def _writecheck(self, zinfo):
" would require ZIP64 extensions")

def write(self, filename, arcname=None,
compress_type=None, compresslevel=None):
compress_type=None, compresslevel=None, preset=None):
"""Put the bytes from filename into the archive under the name
arcname."""
if not self.fp:
Expand Down Expand Up @@ -1784,11 +1803,16 @@ def write(self, filename, arcname=None,
else:
zinfo._compresslevel = self.compresslevel

if preset is not None:
zinfo._preset = preset
else:
zinfo._preset = self.preset

with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
shutil.copyfileobj(src, dest, 1024*8)

def writestr(self, zinfo_or_arcname, data,
compress_type=None, compresslevel=None):
compress_type=None, compresslevel=None, preset=None):
"""Write a file into the archive. The contents is 'data', which
may be either a 'str' or a 'bytes' instance; if it is a 'str',
it is encoded as UTF-8 first.
Expand All @@ -1801,6 +1825,7 @@ def writestr(self, zinfo_or_arcname, data,
date_time=time.localtime(time.time())[:6])
zinfo.compress_type = self.compression
zinfo._compresslevel = self.compresslevel
zinfo._preset = self.preset
if zinfo.filename[-1] == '/':
zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
zinfo.external_attr |= 0x10 # MS-DOS directory flag
Expand All @@ -1822,6 +1847,8 @@ def writestr(self, zinfo_or_arcname, data,

if compresslevel is not None:
zinfo._compresslevel = compresslevel
if preset is not None:
zinfo._preset = preset

zinfo.file_size = len(data) # Uncompressed size
with self._lock:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:meth:`ZipFile.write`, :meth:`ZipFile.writestr` and the
constructor for :class:`zipfile.ZipFile` now accept a ``preset`` parameter
to set the LZMA filter preset.