cpython/Lib/gzip.py at main · python/cpython

Actually, the previous batch's comment should have been different;

Feb 4, 2000

1

"""Functions that read and write gzipped files.

2

More trivial comment -> docstring transformations by Ka-Ping Yee,

Feb 4, 2000

3

The user of the file doesn't have to worry about the compression,

4

but random access is not allowed."""

5

6

# based on Andrew Kuchling's minigzip.py distributed with the zlib module

7

Merged revisions 75935 via svnmerge from

Oct 29, 2009

8

import struct, sys, time, os

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

9

import zlib

#1535 : rename __builtin__ module to builtins.

Dec 2, 2007

10

import builtins

Merged revisions 77288 via svnmerge from

Jan 3, 2010

11

import io

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

12

import _compression

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

13

bpo-6584 : Add a BadGzipFile exception to the gzip module. (GH-13022 )

May 13, 2019

14

__all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]

added a few more __all__ lists

Jan 23, 2001

15

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

16

FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16

17

18

READ, WRITE = 1, 2

19

bpo-34969 : Add --fast, --best on the gzip CLI (GH-9833 )

Nov 3, 2018

20

_COMPRESS_LEVEL_FAST = 1

21

_COMPRESS_LEVEL_TRADEOFF = 6

22

_COMPRESS_LEVEL_BEST = 9

23

gh-95534 : Improve gzip reading speed by 10% (#97664 )

Oct 17, 2022

24

READ_BUFFER_SIZE = 128 * 1024

25

bpo-34969 : Add --fast, --best on the gzip CLI (GH-9833 )

Nov 3, 2018

26

27

def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,

Closes #13989 : Add support for text modes to gzip.open().

May 6, 2012

28

encoding=None, errors=None, newline=None):

29

"""Open a gzip-compressed file in binary or text mode.

30

Add fileobj support to gzip.open().

Jun 4, 2012

31

The filename argument can be an actual filename (a str or bytes object), or

32

an existing file object to read from or write to.

33

Issue #19222 : Add support for the 'x' mode to the gzip module.

Oct 18, 2013

34

The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for

35

binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is

36

"rb", and the default compresslevel is 9.

Closes #13989 : Add support for text modes to gzip.open().

May 6, 2012

37

38

For binary mode, this function is equivalent to the GzipFile constructor:

39

GzipFile(filename, mode, compresslevel). In this case, the encoding, errors

40

and newline arguments must not be provided.

41

42

For text mode, a GzipFile object is created, and wrapped in an

43

io.TextIOWrapper instance with the specified encoding, error handling

44

behavior, and line ending(s).

45

46

"""

47

if "t" in mode:

48

if "b" in mode:

49

raise ValueError("Invalid mode: %r" % (mode,))

50

else:

51

if encoding is not None:

52

raise ValueError("Argument 'encoding' not supported in binary mode")

53

if errors is not None:

54

raise ValueError("Argument 'errors' not supported in binary mode")

55

if newline is not None:

56

raise ValueError("Argument 'newline' not supported in binary mode")

Add fileobj support to gzip.open().

Jun 4, 2012

57

58

gz_mode = mode.replace("t", "")

Issue #28227 : gzip now supports pathlib

Oct 2, 2016

59

if isinstance(filename, (str, bytes, os.PathLike)):

Add fileobj support to gzip.open().

Jun 4, 2012

60

binary_file = GzipFile(filename, gz_mode, compresslevel)

61

elif hasattr(filename, "read") or hasattr(filename, "write"):

62

binary_file = GzipFile(None, gz_mode, compresslevel, filename)

63

else:

64

raise TypeError("filename must be a str or bytes object, or a file")

65

Closes #13989 : Add support for text modes to gzip.open().

May 6, 2012

66

if "t" in mode:

bpo-43510 : Implement PEP 597 opt-in EncodingWarning. (GH-19481 )

Mar 29, 2021

67

encoding = io.text_encoding(encoding)

Closes #13989 : Add support for text modes to gzip.open().

May 6, 2012

68

return io.TextIOWrapper(binary_file, encoding, errors, newline)

69

else:

70

return binary_file

71

Two different changes.

Apr 12, 1999

72

def write32u(output, value):

Related to SF patch 618135: gzip.py and files > 2G.

Nov 4, 2002

73

# The L format writes the bit pattern correctly whether signed

74

# or unsigned.

Two different changes.

Apr 12, 1999

75

output.write(struct.pack("<L", value))

76

Issue #1675951: Allow GzipFile to work with unseekable file objects.

Sep 23, 2010

77

class _PaddedFile:

78

"""Minimal read-only file object that prepends a string to the contents

79

of an actual file. Shouldn't be used outside of gzip.py, as it lacks

80

essential functionality."""

81

82

def __init__(self, f, prepend=b''):

83

self._buffer = prepend

84

self._length = len(prepend)

85

self.file = f

86

self._read = 0

87

88

def read(self, size):

89

if self._read is None:

90

return self.file.read(size)

91

if self._read + size <= self._length:

92

read = self._read

93

self._read += size

94

return self._buffer[read:self._read]

95

else:

96

read = self._read

97

self._read = None

98

return self._buffer[read:] + \

99

self.file.read(size-self._length+read)

100

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

101

def prepend(self, prepend=b''):

Issue #1675951: Allow GzipFile to work with unseekable file objects.

Sep 23, 2010

102

if self._read is None:

103

self._buffer = prepend

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

104

else: # Assume data was read since the last prepend() call

Issue #1675951: Allow GzipFile to work with unseekable file objects.

Sep 23, 2010

105

self._read -= len(prepend)

106

return

107

self._length = len(self._buffer)

108

self._read = 0

109

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

110

def seek(self, off):

Issue #1675951: Allow GzipFile to work with unseekable file objects.

Sep 23, 2010

111

self._read = None

112

self._buffer = None

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

113

return self.file.seek(off)

Issue #1675951: Allow GzipFile to work with unseekable file objects.

Sep 23, 2010

114

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

115

def seekable(self):

116

return True # Allows fast-forwarding even in unseekable streams

Issue #1675951: Allow GzipFile to work with unseekable file objects.

Sep 23, 2010

117

bpo-6584 : Add a BadGzipFile exception to the gzip module. (GH-13022 )

May 13, 2019

118

119

class BadGzipFile(OSError):

120

"""Exception raised in some cases for invalid gzip files."""

121

122

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

123

class GzipFile(_compression.BaseStream):

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

124

"""The GzipFile class simulates most of the methods of a file object with

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

125

the exception of the truncate() method.

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

126

Issue #13989 : Document that GzipFile does not support text mode.

Feb 11, 2012

127

This class only supports opening files in binary mode. If you need to open a

Update GzipFile docstring to mention gzip.open()'s new text-mode supp…

Jun 30, 2012

128

compressed file in text mode, use the gzip.open() function.

Issue #13989 : Document that GzipFile does not support text mode.

Feb 11, 2012

129

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

130

"""

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

131

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

132

# Overridden with internal file object to be closed, if only a filename

133

# is passed in

Add the option to pass an open file object to GzipFile. This obviates

Jul 19, 1997

134

myfileobj = None

135

Whitespace normalization.

Jan 14, 2001

136

def __init__(self, filename=None, mode=None,

bpo-34969 : Add --fast, --best on the gzip CLI (GH-9833 )

Nov 3, 2018

137

compresslevel=_COMPRESS_LEVEL_BEST, fileobj=None, mtime=None):

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

138

"""Constructor for the GzipFile class.

139

140

At least one of fileobj and filename must be given a

141

non-trivial value.

142

143

The new class instance is based on fileobj, which can be a regular

Issue #18743 : Fix references to non-existant "StringIO" module

Aug 29, 2013

144

file, an io.BytesIO object, or any other object which simulates a file.

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

145

It defaults to None, in which case filename is opened to provide

146

a file object.

147

148

When fileobj is not None, the filename argument is only used to be

Fix spelling (inital), grammar (may translates) in documentation, com…

Apr 19, 2016

149

included in the gzip file header, which may include the original

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

150

filename of the uncompressed file. It defaults to the filename of

151

fileobj, if discernible; otherwise, it defaults to the empty string,

152

and in this case the original filename is not included in the header.

153

Issue #19222 : Add support for the 'x' mode to the gzip module.

Oct 18, 2013

154

The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or

155

'xb' depending on whether the file will be read or written. The default

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

156

is the mode of fileobj if discernible; otherwise, the default is 'rb'.

Issue #13989 : Document that GzipFile does not support text mode.

Feb 11, 2012

157

A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and

Issue #19222 : Add support for the 'x' mode to the gzip module.

Oct 18, 2013

158

'wb', 'a' and 'ab', and 'x' and 'xb'.

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

159

Issue #15677 : Document that zlib and gzip accept a compression level …

Nov 11, 2012

160

The compresslevel argument is an integer from 0 to 9 controlling the

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

161

level of compression; 1 is fastest and produces the least compression,

Issue #15677 : Document that zlib and gzip accept a compression level …

Nov 11, 2012

162

and 9 is slowest and produces the most compression. 0 is no compression

163

at all. The default is 9.

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

164

Merged revisions 68319 via svnmerge from

Jan 4, 2009

165

The mtime argument is an optional numeric timestamp to be written

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

166

to the last modification time field in the stream when compressing.

167

If omitted or None, the current time is used.

Merged revisions 68319 via svnmerge from

Jan 4, 2009

168

Patch 560023 adding docstrings. 2.2 Candidate (after verifying module…

May 29, 2002

169

"""

170

Issue #13989 : Document that GzipFile does not support text mode.

Feb 11, 2012

171

if mode and ('t' in mode or 'U' in mode):

Clean up GzipFile mode string handling code.

Feb 11, 2012

172

raise ValueError("Invalid mode: {!r}".format(mode))

force gzip module to open files using 'b'inary mode.

May 23, 2002

173

if mode and 'b' not in mode:

174

mode += 'b'

Mass check-in after untabifying all files that need it.

Mar 26, 1998

175

if fileobj is None:

#1535 : rename __builtin__ module to builtins.

Dec 2, 2007

176

fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')

Add the option to pass an open file object to GzipFile. This obviates

Jul 19, 1997

177

if filename is None:

Fix GzipFile's handling of filenames given as bytes objects.

Jun 19, 2012

178

filename = getattr(fileobj, 'name', '')

179

if not isinstance(filename, (str, bytes)):

Issue #13781 : Fix GzipFile to work with os.fdopen()'d file objects.

Jan 18, 2012

180

filename = ''

Issue #28227 : gzip now supports pathlib

Oct 2, 2016

181

else:

182

filename = os.fspath(filename)

bpo-28286 : Deprecate opening GzipFile for writing implicitly. (GH-16417 )

Nov 16, 2019

183

origmode = mode

Add the option to pass an open file object to GzipFile. This obviates

Jul 19, 1997

184

if mode is None:

Clean up GzipFile mode string handling code.

Feb 11, 2012

185

mode = getattr(fileobj, 'mode', 'rb')

Mass check-in after untabifying all files that need it.

Mar 26, 1998

186

Clean up GzipFile mode string handling code.

Feb 11, 2012

187

if mode.startswith('r'):

Mass check-in after untabifying all files that need it.

Mar 26, 1998

188

self.mode = READ

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

189

raw = _GzipReader(fileobj)

190

self._buffer = io.BufferedReader(raw)

Merged revisions 53623-53858 via svnmerge from

Feb 23, 2007

191

self.name = filename

Mass check-in after untabifying all files that need it.

Mar 26, 1998

192

Issue #19222 : Add support for the 'x' mode to the gzip module.

Oct 18, 2013

193

elif mode.startswith(('w', 'a', 'x')):

bpo-28286 : Deprecate opening GzipFile for writing implicitly. (GH-16417 )

Nov 16, 2019

194

if origmode is None:

195

import warnings

196

warnings.warn(

197

"GzipFile was opened for writing, but this will "

198

"change in future Python releases. "

199

"Specify the mode argument for opening it for writing.",

200

FutureWarning, 2)

Mass check-in after untabifying all files that need it.

Mar 26, 1998

201

self.mode = WRITE

202

self._init_write(filename)

203

self.compress = zlib.compressobj(compresslevel,

Whitespace normalization.

Jan 14, 2001

204

zlib.DEFLATED,

Mass check-in after untabifying all files that need it.

Mar 26, 1998

205

-zlib.MAX_WBITS,

206

zlib.DEF_MEM_LEVEL,

207

0)

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

208

self._write_mtime = mtime

Mass check-in after untabifying all files that need it.

Mar 26, 1998

209

else:

Clean up GzipFile mode string handling code.

Feb 11, 2012

210

raise ValueError("Invalid mode: {!r}".format(mode))

Mass check-in after untabifying all files that need it.

Mar 26, 1998

211

212

self.fileobj = fileobj

213

214

if self.mode == WRITE:

bpo-39389 : gzip: fix compression level metadata (GH-18077 )

Jan 21, 2020

215

self._write_gzip_header(compresslevel)

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

216

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

217

@property

218

def mtime(self):

219

"""Last modification time read from stream, or None"""

220

return self._buffer.raw._last_mtime

221

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

222

def __repr__(self):

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

223

s = repr(self.fileobj)

Mass check-in after untabifying all files that need it.

Mar 26, 1998

224

return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

225

226

def _init_write(self, filename):

Merged revisions 53623-53858 via svnmerge from

Feb 23, 2007

227

self.name = filename

Issue #22341 : Drop Python 2 workaround and document CRC initial value

Dec 11, 2015

228

self.crc = zlib.crc32(b"")

Mass check-in after untabifying all files that need it.

Mar 26, 1998

229

self.size = 0

230

self.writebuf = []

231

self.bufsize = 0

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

232

self.offset = 0 # Current file offset for seek(), tell(), etc

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

233

bpo-39389 : gzip: fix compression level metadata (GH-18077 )

Jan 21, 2020

234

def _write_gzip_header(self, compresslevel):

Fix gzip.py: Use bytes where 8bit strings have been used formerly.

Jun 6, 2007

235

self.fileobj.write(b'\037\213') # magic header

236

self.fileobj.write(b'\010') # compression method

RFC 1952 requires the FNAME field to be Latin-1. Do not include

Aug 10, 2007

237

try:

Retouch my last change after a comment on style from Guido.

Aug 13, 2007

238

# RFC 1952 requires the FNAME field to be Latin-1. Do not

239

# include filenames that cannot be represented that way.

Merged revisions 75935 via svnmerge from

Oct 29, 2009

240

fname = os.path.basename(self.name)

Fix GzipFile's handling of filenames given as bytes objects.

Jun 19, 2012

241

if not isinstance(fname, bytes):

242

fname = fname.encode('latin-1')

Retouch my last change after a comment on style from Guido.

Aug 13, 2007

243

if fname.endswith(b'.gz'):

244

fname = fname[:-3]

RFC 1952 requires the FNAME field to be Latin-1. Do not include

Aug 10, 2007

245

except UnicodeEncodeError:

Retouch my last change after a comment on style from Guido.

Aug 13, 2007

246

fname = b''

247

flags = 0

Mass check-in after untabifying all files that need it.

Mar 26, 1998

248

if fname:

249

flags = FNAME

Fix gzip.py: Use bytes where 8bit strings have been used formerly.

Jun 6, 2007

250

self.fileobj.write(chr(flags).encode('latin-1'))

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

251

mtime = self._write_mtime

Merged revisions 68319 via svnmerge from

Jan 4, 2009

252

if mtime is None:

253

mtime = time.time()

254

write32u(self.fileobj, int(mtime))

bpo-39389 : gzip: fix compression level metadata (GH-18077 )

Jan 21, 2020

255

if compresslevel == _COMPRESS_LEVEL_BEST:

256

xfl = b'\002'

257

elif compresslevel == _COMPRESS_LEVEL_FAST:

258

xfl = b'\004'

259

else:

260

xfl = b'\000'

261

self.fileobj.write(xfl)

Fix gzip.py: Use bytes where 8bit strings have been used formerly.

Jun 6, 2007

262

self.fileobj.write(b'\377')

Mass check-in after untabifying all files that need it.

Mar 26, 1998

263

if fname:

RFC 1952 requires the FNAME field to be Latin-1. Do not include

Aug 10, 2007

264

self.fileobj.write(fname + b'\000')

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

265

266

def write(self,data):

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

267

self._check_not_closed()

Patch #443899: Check modes on files before performing operations.

Mar 11, 2002

268

if self.mode != WRITE:

269

import errno

Replace IOError with OSError (#16715 )

Dec 25, 2012

270

raise OSError(errno.EBADF, "write() on read-only GzipFile object")

Whitespace normalization.

Apr 16, 2002

271

Mass check-in after untabifying all files that need it.

Mar 26, 1998

272

if self.fileobj is None:

Raise statement normalization in Lib/.

Aug 30, 2007

273

raise ValueError("write() on closed GzipFile object")

Merged revisions 77288 via svnmerge from

Jan 3, 2010

274

bpo-44439 : BZ2File.write() / LZMAFile.write() handle buffer protocol …

Jun 22, 2021

275

if isinstance(data, (bytes, bytearray)):

Issue #23688 : Added support of arbitrary bytes-like objects and avoided

Mar 23, 2015

276

length = len(data)

277

else:

278

# accept any data that supports the buffer protocol

279

data = memoryview(data)

280

length = data.nbytes

Merged revisions 77288 via svnmerge from

Jan 3, 2010

281

Issue #23688 : Added support of arbitrary bytes-like objects and avoided

Mar 23, 2015

282

if length > 0:

283

self.fileobj.write(self.compress.compress(data))

284

self.size += length

Issue #22341 : Drop Python 2 workaround and document CRC initial value

Dec 11, 2015

285

self.crc = zlib.crc32(data, self.crc)

Issue #23688 : Added support of arbitrary bytes-like objects and avoided

Mar 23, 2015

286

self.offset += length

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

287

Issue #23688 : Added support of arbitrary bytes-like objects and avoided

Mar 23, 2015

288

return length

Merged revisions 77288 via svnmerge from

Jan 3, 2010

289

Make read() and readlines() conform more to the file object interface:

Feb 2, 2000

290

def read(self, size=-1):

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

291

self._check_not_closed()

Patch #443899: Check modes on files before performing operations.

Mar 11, 2002

292

if self.mode != READ:

293

import errno

Replace IOError with OSError (#16715 )

Dec 25, 2012

294

raise OSError(errno.EBADF, "read() on write-only GzipFile object")

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

295

return self._buffer.read(size)

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

296

Issue #10791 : Implement missing method GzipFile.read1(), allowing Gzi…

Apr 4, 2011

297

def read1(self, size=-1):

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

298

"""Implements BufferedIOBase.read1()

299

fix typo in gzip.py (GH-12928 )

Apr 24, 2019

300

Reads up to a buffer's worth of data if size is negative."""

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

301

self._check_not_closed()

Issue #10791 : Implement missing method GzipFile.read1(), allowing Gzi…

Apr 4, 2011

302

if self.mode != READ:

303

import errno

Replace IOError with OSError (#16715 )

Dec 25, 2012

304

raise OSError(errno.EBADF, "read1() on write-only GzipFile object")

Issue #10791 : Implement missing method GzipFile.read1(), allowing Gzi…

Apr 4, 2011

305

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

306

if size < 0:

307

size = io.DEFAULT_BUFFER_SIZE

308

return self._buffer.read1(size)

Issue #10791 : Implement missing method GzipFile.read1(), allowing Gzi…

Apr 4, 2011

309

Issue #9962 : GzipFile now has the peek() method.

Sep 29, 2010

310

def peek(self, n):

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

311

self._check_not_closed()

Issue #9962 : GzipFile now has the peek() method.

Sep 29, 2010

312

if self.mode != READ:

313

import errno

Replace IOError with OSError (#16715 )

Dec 25, 2012

314

raise OSError(errno.EBADF, "peek() on write-only GzipFile object")

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

315

return self._buffer.peek(n)

Merged revisions 77472-77473 via svnmerge from

Jan 13, 2010

316

Merged revisions 77288 via svnmerge from

Jan 3, 2010

317

@property

318

def closed(self):

319

return self.fileobj is None

320

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

321

def close(self):

Issue #23865 : close() methods in multiple modules now are idempotent …

Apr 10, 2015

322

fileobj = self.fileobj

323

if fileobj is None:

Merged revisions 63412,63445-63447,63449-63450,63452,63454,63459,6346…

May 25, 2008

324

return

Issue #23865 : close() methods in multiple modules now are idempotent …

Apr 10, 2015

325

self.fileobj = None

326

try:

327

if self.mode == WRITE:

328

fileobj.write(self.compress.flush())

329

write32u(fileobj, self.crc)

Replace KB unit with KiB (#4293 )

Nov 8, 2017

330

# self.size may exceed 2 GiB, or even 4 GiB

Issue #23865 : close() methods in multiple modules now are idempotent …

Apr 10, 2015

331

write32u(fileobj, self.size & 0xffffffff)

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

332

elif self.mode == READ:

333

self._buffer.close()

Issue #23865 : close() methods in multiple modules now are idempotent …

Apr 10, 2015

334

finally:

335

myfileobj = self.myfileobj

336

if myfileobj:

337

self.myfileobj = None

338

myfileobj.close()

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

339

Patch #1110248: SYNC_FLUSH the zlib buffer for GZipFile.flush.

Mar 3, 2005

340

def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

341

self._check_not_closed()

Patch #1110248: SYNC_FLUSH the zlib buffer for GZipFile.flush.

Mar 3, 2005

342

if self.mode == WRITE:

Whitespace normalization.

Mar 28, 2005

343

# Ensure the compressor's buffer is flushed

344

self.fileobj.write(self.compress.flush(zlib_mode))

Merged revisions 80762 via svnmerge from

May 4, 2010

345

self.fileobj.flush()

Adding Jeremy Hylton's gzip module.

Apr 30, 1997

346

Added a new fileno() method. ZODB's repozo.py wants this so it can

Jul 27, 2004

347

def fileno(self):

348

"""Invoke the underlying file object's fileno() method.

349

350

This will raise AttributeError if the underlying file object

351

doesn't support fileno().

352

"""

353

return self.fileobj.fileno()

354

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

355

def rewind(self):

356

'''Return the uncompressed stream file position indicator to the

Whitespace normalization.

Aug 9, 2001

357

beginning of the file'''

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

358

if self.mode != READ:

Replace IOError with OSError (#16715 )

Dec 25, 2012

359

raise OSError("Can't rewind in write mode")

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

360

self._buffer.seek(0)

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

361

Merged revisions 77288 via svnmerge from

Jan 3, 2010

362

def readable(self):

363

return self.mode == READ

364

365

def writable(self):

366

return self.mode == WRITE

367

368

def seekable(self):

369

return True

370

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

371

def seek(self, offset, whence=io.SEEK_SET):

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

372

if self.mode == WRITE:

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

373

if whence != io.SEEK_SET:

374

if whence == io.SEEK_CUR:

375

offset = self.offset + offset

376

else:

377

raise ValueError('Seek from end not supported')

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

378

if offset < self.offset:

Replace IOError with OSError (#16715 )

Dec 25, 2012

379

raise OSError('Negative seek in write mode')

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

380

count = offset - self.offset

Use sequence repetition instead of bytes constructor with integer arg…

Sep 11, 2016

381

chunk = b'\0' * 1024

Related to SF patch 618135: gzip.py and files > 2G.

Nov 4, 2002

382

for i in range(count // 1024):

Fix gzip.py: Use bytes where 8bit strings have been used formerly.

Jun 6, 2007

383

self.write(chunk)

Use sequence repetition instead of bytes constructor with integer arg…

Sep 11, 2016

384

self.write(b'\0' * (count % 1024))

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

385

elif self.mode == READ:

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

386

self._check_not_closed()

387

return self._buffer.seek(offset, whence)

Patch #448474: Add support for tell() and seek() to gzip.GzipFile.

Aug 9, 2001

388

Merged revisions 77288 via svnmerge from

Jan 3, 2010

389

return self.offset

390

SF patch #100740: Add optional size arguments to .readline() and

Jul 29, 2000

391

def readline(self, size=-1):

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

392

self._check_not_closed()

393

return self._buffer.readline(size)

394

395

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

396

def _read_exact(fp, n):

397

'''Read exactly *n* bytes from `fp`

398

399

This method is required because fp may be unbuffered,

400

i.e. return short reads.

401

'''

402

data = fp.read(n)

403

while len(data) < n:

404

b = fp.read(n - len(data))

405

if not b:

406

raise EOFError("Compressed file ended before the "

407

"end-of-stream marker was reached")

408

data += b

409

return data

410

411

412

def _read_gzip_header(fp):

413

'''Read a gzip header from `fp` and progress to the end of the header.

414

415

Returns last mtime if header was present or None otherwise.

416

'''

417

magic = fp.read(2)

418

if magic == b'':

419

return None

420

421

if magic != b'\037\213':

422

raise BadGzipFile('Not a gzipped file (%r)' % magic)

423

424

(method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8))

425

if method != 8:

426

raise BadGzipFile('Unknown compression method')

427

428

if flag & FEXTRA:

429

# Read & discard the extra field, if present

430

extra_len, = struct.unpack("<H", _read_exact(fp, 2))

431

_read_exact(fp, extra_len)

432

if flag & FNAME:

433

# Read and discard a null-terminated string containing the filename

434

while True:

435

s = fp.read(1)

436

if not s or s==b'\000':

437

break

438

if flag & FCOMMENT:

439

# Read and discard a null-terminated string containing a comment

440

while True:

441

s = fp.read(1)

442

if not s or s==b'\000':

443

break

444

if flag & FHCRC:

445

_read_exact(fp, 2) # Read & discard the 16-bit header CRC

446

return last_mtime

447

448

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

449

class _GzipReader(_compression.DecompressReader):

450

def __init__(self, fp):

gh-95534 : Improve gzip reading speed by 10% (#97664 )

Oct 17, 2022

451

super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

452

wbits=-zlib.MAX_WBITS)

453

# Set flag indicating start of a new member

454

self._new_member = True

455

self._last_mtime = None

456

457

def _init_read(self):

Issue #22341 : Drop Python 2 workaround and document CRC initial value

Dec 11, 2015

458

self._crc = zlib.crc32(b"")

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

459

self._stream_size = 0 # Decompressed size of unconcatenated stream

460

461

def _read_gzip_header(self):

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

462

last_mtime = _read_gzip_header(self._fp)

463

if last_mtime is None:

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

464

return False

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

465

self._last_mtime = last_mtime

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

466

return True

467

468

def read(self, size=-1):

Much-needed merge (using svnmerge.py this time) of trunk changes into…

May 27, 2006

469

if size < 0:

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

470

return self.readall()

471

# size=0 is special because decompress(max_length=0) is not supported

472

if not size:

473

return b""

474

475

# For certain input data, a single

476

# call to decompress() may not return

477

# any data. In this case, retry until we get some data or reach EOF.

478

while True:

479

if self._decompressor.eof:

480

# Ending case: we've come to the end of a member in the file,

481

# so finish up this member, and read a new gzip header.

482

# Check the CRC and file size, and set the flag so we read

483

# a new member

484

self._read_eof()

485

self._new_member = True

486

self._decompressor = self._decomp_factory(

487

**self._decomp_args)

488

489

if self._new_member:

490

# If the _new_member flag is set, we have to

491

# jump to the next member, if there is one.

492

self._init_read()

493

if not self._read_gzip_header():

494

self._size = self._pos

495

return b""

496

self._new_member = False

497

498

# Read a chunk of data from the file

gh-95534 : Improve gzip reading speed by 10% (#97664 )

Oct 17, 2022

499

if self._decompressor.needs_input:

500

buf = self._fp.read(READ_BUFFER_SIZE)

501

uncompress = self._decompressor.decompress(buf, size)

502

else:

503

uncompress = self._decompressor.decompress(b"", size)

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

504

gh-95534 : Improve gzip reading speed by 10% (#97664 )

Oct 17, 2022

505

if self._decompressor.unused_data != b"":

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

506

# Prepend the already read bytes to the fileobj so they can

507

# be seen by _read_eof() and _read_gzip_header()

508

self._fp.prepend(self._decompressor.unused_data)

509

510

if uncompress != b"":

Much-needed merge (using svnmerge.py this time) of trunk changes into…

May 27, 2006

511

break

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

512

if buf == b"":

513

raise EOFError("Compressed file ended before the "

514

"end-of-stream marker was reached")

SF patch #100740: Add optional size arguments to .readline() and

Jul 29, 2000

515

gh-95534 : Improve gzip reading speed by 10% (#97664 )

Oct 17, 2022

516

self._crc = zlib.crc32(uncompress, self._crc)

517

self._stream_size += len(uncompress)

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

518

self._pos += len(uncompress)

519

return uncompress

Whitespace normalization.

Jan 14, 2001

520

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

521

def _read_eof(self):

522

# We've read to the end of the file

Fix typo in comment (GH-26162 )

May 16, 2021

523

# We check that the computed CRC and size of the

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

524

# uncompressed data matches the stored values. Note that the size

525

# stored is the true file size mod 2**32.

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

526

crc32, isize = struct.unpack("<II", _read_exact(self._fp, 8))

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

527

if crc32 != self._crc:

bpo-6584 : Add a BadGzipFile exception to the gzip module. (GH-13022 )

May 13, 2019

528

raise BadGzipFile("CRC check failed %s != %s" % (hex(crc32),

529

hex(self._crc)))

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

530

elif isize != (self._stream_size & 0xffffffff):

bpo-6584 : Add a BadGzipFile exception to the gzip module. (GH-13022 )

May 13, 2019

531

raise BadGzipFile("Incorrect length of data produced")

Issue #23529 : Limit the size of decompressed data when reading from

Apr 10, 2015

532

533

# Gzip files can be padded with zeroes and still have archives.

534

# Consume all zero bytes and set the file position to the first

535

# non-zero byte. See http://www.gzip.org/#faq8

536

c = b"\x00"

537

while c == b"\x00":

538

c = self._fp.read(1)

539

if c:

540

self._fp.prepend(c)

541

542

def _rewind(self):

543

super()._rewind()

544

self._new_member = True

When there's no filename, don't make one up.

Dec 30, 1997

545

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

546

547

def _create_simple_gzip_header(compresslevel: int,

548

mtime = None) -> bytes:

549

"""

550

Write a simple gzip header with no extra fields.

551

:param compresslevel: Compresslevel used to determine the xfl bytes.

552

:param mtime: The mtime (must support conversion to a 32-bit integer).

553

:return: A bytes object representing the gzip header.

554

"""

555

if mtime is None:

556

mtime = time.time()

557

if compresslevel == _COMPRESS_LEVEL_BEST:

558

xfl = 2

559

elif compresslevel == _COMPRESS_LEVEL_FAST:

560

xfl = 4

561

else:

562

xfl = 0

563

# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra

564

# fields added to header), mtime, xfl and os (255 for unknown OS).

565

return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)

566

567

bpo-34898 : Add mtime parameter to gzip.compress(). (GH-9704 )

Nov 7, 2018

568

def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):

Issue #3488 : Provide convenient shorthand functions gzip.compress

Aug 17, 2010

569

"""Compress data in one shot and return the compressed string.

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

570

571

compresslevel sets the compression level in range of 0-9.

572

mtime can be used to set the modification time. The modification time is

573

set to the current time by default.

Issue #3488 : Provide convenient shorthand functions gzip.compress

Aug 17, 2010

574

"""

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

575

if mtime == 0:

576

# Use zlib as it creates the header with 0 mtime by default.

577

# This is faster and with less overhead.

578

return zlib.compress(data, level=compresslevel, wbits=31)

579

header = _create_simple_gzip_header(compresslevel, mtime)

580

trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))

581

# Wbits=-15 creates a raw deflate block.

gh-90839 : Forward gzip.compress() compresslevel to zlib (gh-31215 )

Apr 12, 2022

582

return (header + zlib.compress(data, level=compresslevel, wbits=-15) +

583

trailer)

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

584

Issue #3488 : Provide convenient shorthand functions gzip.compress

Aug 17, 2010

585

586

def decompress(data):

587

"""Decompress a gzip compressed string in one shot.

588

Return the decompressed string.

589

"""

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

590

decompressed_members = []

591

while True:

592

fp = io.BytesIO(data)

593

if _read_gzip_header(fp) is None:

594

return b"".join(decompressed_members)

595

# Use a zlib raw deflate compressor

596

do = zlib.decompressobj(wbits=-zlib.MAX_WBITS)

597

# Read all the data except the header

598

decompressed = do.decompress(data[fp.tell():])

bpo-45507 : EOFErrors should be thrown for truncated gzip members (GH-…

Nov 19, 2021

599

if not do.eof or len(do.unused_data) < 8:

600

raise EOFError("Compressed file ended before the end-of-stream "

601

"marker was reached")

bpo-43613 : Faster implementation of gzip.compress and gzip.decompress (…

Sep 2, 2021

602

crc, length = struct.unpack("<II", do.unused_data[:8])

603

if crc != zlib.crc32(decompressed):

604

raise BadGzipFile("CRC check failed")

605

if length != (len(decompressed) & 0xffffffff):

606

raise BadGzipFile("Incorrect length of data produced")

607

decompressed_members.append(decompressed)

608

data = do.unused_data[8:].lstrip(b"\x00")

Issue #3488 : Provide convenient shorthand functions gzip.compress

Aug 17, 2010

609

610

bpo-23596 : Use argparse for the command line of gzip (GH-9781 )

Oct 9, 2018

611

def main():

612

from argparse import ArgumentParser

613

parser = ArgumentParser(description=

614

"A simple command line interface for the gzip module: act like gzip, "

615

"but do not delete the input file.")

bpo-34969 : Add --fast, --best on the gzip CLI (GH-9833 )

Nov 3, 2018

616

group = parser.add_mutually_exclusive_group()

617

group.add_argument('--fast', action='store_true', help='compress faster')

618

group.add_argument('--best', action='store_true', help='compress better')

619

group.add_argument("-d", "--decompress", action="store_true",

bpo-23596 : Use argparse for the command line of gzip (GH-9781 )

Oct 9, 2018

620

help="act like gunzip instead of gzip")

bpo-34969 : Add --fast, --best on the gzip CLI (GH-9833 )

Nov 3, 2018

621

bpo-23596 : Use argparse for the command line of gzip (GH-9781 )

Oct 9, 2018

622

parser.add_argument("args", nargs="*", default=["-"], metavar='file')

623

args = parser.parse_args()

bpo-34969 : Add --fast, --best on the gzip CLI (GH-9833 )

Nov 3, 2018

624

625

compresslevel = _COMPRESS_LEVEL_TRADEOFF

626

if args.fast:

627

compresslevel = _COMPRESS_LEVEL_FAST

628

elif args.best:

629

compresslevel = _COMPRESS_LEVEL_BEST

630

bpo-23596 : Use argparse for the command line of gzip (GH-9781 )

Oct 9, 2018

631

for arg in args.args:

632

if args.decompress:

Mass check-in after untabifying all files that need it.

Mar 26, 1998

633

if arg == "-":

Buglet: restore functioning of gzip as an executable when (de)compres…

Jan 4, 2009

634

f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)

635

g = sys.stdout.buffer

Mass check-in after untabifying all files that need it.

Mar 26, 1998

636

else:

637

if arg[-3:] != ".gz":

bpo-43316 : gzip: Fix sys.exit() usage. (GH-24652 )

Feb 26, 2021

638

sys.exit(f"filename doesn't end in .gz: {arg!r}")

Mass check-in after untabifying all files that need it.

Mar 26, 1998

639

f = open(arg, "rb")

#1535 : rename __builtin__ module to builtins.

Dec 2, 2007

640

g = builtins.open(arg[:-3], "wb")

Mass check-in after untabifying all files that need it.

Mar 26, 1998

641

else:

642

if arg == "-":

Buglet: restore functioning of gzip as an executable when (de)compres…

Jan 4, 2009

643

f = sys.stdin.buffer

bpo-34969 : Add --fast, --best on the gzip CLI (GH-9833 )

Nov 3, 2018

644

g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer,

645

compresslevel=compresslevel)

Mass check-in after untabifying all files that need it.

Mar 26, 1998

646

else:

#1535 : rename __builtin__ module to builtins.

Dec 2, 2007

647

f = builtins.open(arg, "rb")

Mass check-in after untabifying all files that need it.

Mar 26, 1998

648

g = open(arg + ".gz", "wb")

Partial introduction of bools where appropriate.

Apr 7, 2002

649

while True:

gh-95534 : Improve gzip reading speed by 10% (#97664 )

Oct 17, 2022

650

chunk = f.read(READ_BUFFER_SIZE)

Mass check-in after untabifying all files that need it.

Mar 26, 1998

651

if not chunk:

652

break

653

g.write(chunk)

Issue #15800 : fix the closing of input / output files when gzip is us…

Aug 29, 2012

654

if g is not sys.stdout.buffer:

Mass check-in after untabifying all files that need it.

Mar 26, 1998

655

g.close()

Issue #15800 : fix the closing of input / output files when gzip is us…

Aug 29, 2012

656

if f is not sys.stdin.buffer:

Mass check-in after untabifying all files that need it.

Mar 26, 1998

657

f.close()

When there's no filename, don't make one up.

Dec 30, 1997

658

659

if __name__ == '__main__':

bpo-23596 : Use argparse for the command line of gzip (GH-9781 )

Oct 9, 2018

660

main()