"""Read and write ZIP files.
XXX references to utf-8 need further investigation."""
importioimportosimportreimportimportlib.utilimportsysimporttimeimportstatimportshutilimportstructimportbinasciitry:importthreadingexceptImportError:importdummy_threading as threadingtry:import zlib #We may need its compression method
crc32 =zlib.crc32exceptImportError:
zlib=None
crc32=binascii.crc32try:import bz2 #We may need its compression method
exceptImportError:
bz2=Nonetry:import lzma #We may need its compression method
exceptImportError:
lzma=None__all__ = ["BadZipFile", "BadZipfile", "error","ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA","is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]classBadZipFile(Exception):pass
classLargeZipFile(Exception):"""Raised when writing a zipfile, the zipfile requires ZIP64 extensions
and those extensions are disabled."""error= BadZipfile = BadZipFile #Pre-3.2 compatibility names
ZIP64_LIMIT= (1 << 31) - 1ZIP_FILECOUNT_LIMIT= (1 << 16) - 1ZIP_MAX_COMMENT= (1 << 16) - 1
#constants for Zip file compression methods
ZIP_STORED =0
ZIP_DEFLATED= 8ZIP_BZIP2= 12ZIP_LZMA= 14
#Other ZIP compression methods not supported
DEFAULT_VERSION= 20ZIP64_VERSION= 45BZIP2_VERSION= 46LZMA_VERSION= 63
#we recognize (but not necessarily support) all features up to that version
MAX_EXTRACT_VERSION = 63
#Below are some formats and associated data for reading/writing headers using#the struct module. The names and structures of headers/records are those used#in the PKWARE description of the ZIP file format:#http://www.pkware.com/documents/casestudies/APPNOTE.TXT#(URL valid as of January 2008)
#The "end of central directory" structure, magic number, size, and indices#(section V.I in the format document)
structEndArchive = b"<4s4H2LH"stringEndArchive= b"PK\005\006"sizeEndCentDir=struct.calcsize(structEndArchive)
_ECD_SIGNATURE=0
_ECD_DISK_NUMBER= 1_ECD_DISK_START= 2_ECD_ENTRIES_THIS_DISK= 3_ECD_ENTRIES_TOTAL= 4_ECD_SIZE= 5_ECD_OFFSET= 6_ECD_COMMENT_SIZE= 7
#These last two indices are not part of the structure as defined in the#spec, but they are used internally by this module as a convenience
_ECD_COMMENT = 8_ECD_LOCATION= 9
#The "central directory" structure, magic number, size, and indices#of entries in the structure (section V.F in the format document)
structCentralDir = "<4s4B4HL2L5H2L"stringCentralDir= b"PK\001\002"sizeCentralDir=struct.calcsize(structCentralDir)#indexes of entries in the central directory structure
_CD_SIGNATURE =0
_CD_CREATE_VERSION= 1_CD_CREATE_SYSTEM= 2_CD_EXTRACT_VERSION= 3_CD_EXTRACT_SYSTEM= 4_CD_FLAG_BITS= 5_CD_COMPRESS_TYPE= 6_CD_TIME= 7_CD_DATE= 8_CD_CRC= 9_CD_COMPRESSED_SIZE= 10_CD_UNCOMPRESSED_SIZE= 11_CD_FILENAME_LENGTH= 12_CD_EXTRA_FIELD_LENGTH= 13_CD_COMMENT_LENGTH= 14_CD_DISK_NUMBER_START= 15_CD_INTERNAL_FILE_ATTRIBUTES= 16_CD_EXTERNAL_FILE_ATTRIBUTES= 17_CD_LOCAL_HEADER_OFFSET= 18
#The "local file header" structure, magic number, size, and indices#(section V.A in the format document)
structFileHeader = "<4s2B4HL2L2H"stringFileHeader= b"PK\003\004"sizeFileHeader=struct.calcsize(structFileHeader)
_FH_SIGNATURE=0
_FH_EXTRACT_VERSION= 1_FH_EXTRACT_SYSTEM= 2_FH_GENERAL_PURPOSE_FLAG_BITS= 3_FH_COMPRESSION_METHOD= 4_FH_LAST_MOD_TIME= 5_FH_LAST_MOD_DATE= 6_FH_CRC= 7_FH_COMPRESSED_SIZE= 8_FH_UNCOMPRESSED_SIZE= 9_FH_FILENAME_LENGTH= 10_FH_EXTRA_FIELD_LENGTH= 11
#The "Zip64 end of central directory locator" structure, magic number, and size
structEndArchive64Locator = "<4sLQL"stringEndArchive64Locator= b"PK\x06\x07"sizeEndCentDir64Locator=struct.calcsize(structEndArchive64Locator)#The "Zip64 end of central directory" record, magic number, size, and indices#(section V.G in the format document)
structEndArchive64 = "<4sQ2H2L4Q"stringEndArchive64= b"PK\x06\x06"sizeEndCentDir64=struct.calcsize(structEndArchive64)
_CD64_SIGNATURE=0
_CD64_DIRECTORY_RECSIZE= 1_CD64_CREATE_VERSION= 2_CD64_EXTRACT_VERSION= 3_CD64_DISK_NUMBER= 4_CD64_DISK_NUMBER_START= 5_CD64_NUMBER_ENTRIES_THIS_DISK= 6_CD64_NUMBER_ENTRIES_TOTAL= 7_CD64_DIRECTORY_SIZE= 8_CD64_OFFSET_START_CENTDIR= 9
def_check_zipfile(fp):try:if_EndRecData(fp):return True #file has correct magic number
exceptOSError:pass
returnFalsedefis_zipfile(filename):"""Quickly see if a file is a ZIP file by checking the magic number.
The filename argument may be a file or file-like object too."""result=Falsetry:if hasattr(filename, "read"):
result= _check_zipfile(fp=filename)else:
with open(filename,"rb") as fp:
result=_check_zipfile(fp)exceptOSError:pass
returnresultdef_EndRecData64(fpin, offset, endrec):"""Read the ZIP64 end-of-archive records and use that to update endrec"""
try:
fpin.seek(offset- sizeEndCentDir64Locator, 2)exceptOSError:#If the seek fails, the file is not large enough to contain a ZIP64
#end-of-archive record, so just return the end record we were given.
returnendrec
data=fpin.read(sizeEndCentDir64Locator)if len(data) !=sizeEndCentDir64Locator:returnendrec
sig, diskno, reloff, disks=struct.unpack(structEndArchive64Locator, data)if sig !=stringEndArchive64Locator:returnendrecif diskno != 0 or disks != 1:raise BadZipFile("zipfiles that span multiple disks are not supported")#Assume no 'zip64 extensible data'
fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
data=fpin.read(sizeEndCentDir64)if len(data) !=sizeEndCentDir64:returnendrec
sig, sz, create_version, read_version, disk_num, disk_dir, \
dircount, dircount2, dirsize, diroffset=\
struct.unpack(structEndArchive64, data)if sig !=stringEndArchive64:returnendrec#Update the original endrec using data from the ZIP64 record
endrec[_ECD_SIGNATURE] =sig
endrec[_ECD_DISK_NUMBER]=disk_num
endrec[_ECD_DISK_START]=disk_dir
endrec[_ECD_ENTRIES_THIS_DISK]=dircount
endrec[_ECD_ENTRIES_TOTAL]=dircount2
endrec[_ECD_SIZE]=dirsize
endrec[_ECD_OFFSET]=diroffsetreturnendrecdef_EndRecData(fpin):"""Return data from the "End of Central Directory" record, or None.
The data is a list of the nine items in the ZIP "End of central dir"
record followed by a tenth item, the file seek offset of this record."""
#Determine file size
fpin.seek(0, 2)
filesize=fpin.tell()#Check to see if this is ZIP file with no archive comment (the
#"end of central directory" structure should be the last item in the
#file if this is the case).
try:
fpin.seek(-sizeEndCentDir, 2)exceptOSError:returnNone
data=fpin.read()if (len(data) == sizeEndCentDir anddata[0:4] == stringEndArchive anddata[-2:] == b"\000\000"):#the signature is correct and there's no comment, unpack structure
endrec =struct.unpack(structEndArchive, data)
endrec=list(endrec)#Append a blank comment and record start offset
endrec.append(b"")
endrec.append(filesize-sizeEndCentDir)#Try to read the "Zip64 end of central directory" structure
return _EndRecData64(fpin, -sizeEndCentDir, endrec)#Either this is not a ZIP file, or it is a ZIP file with an archive
#comment. Search the end of the file for the "end of central directory"
#record signature. The comment is the last item in the ZIP file and may be
#up to 64K long. It is assumed that the "end of central directory" magic
#number does not appear in the comment.
maxCommentStart = max(filesize - (1 << 16) -sizeEndCentDir, 0)
fpin.seek(maxCommentStart, 0)
data=fpin.read()
start=data.rfind(stringEndArchive)if start >=0:#found the magic number; attempt to unpack and interpret
recData = data[start:start+sizeEndCentDir]if len(recData) !=sizeEndCentDir:#Zip file is corrupted.
returnNone
endrec=list(struct.unpack(structEndArchive, recData))
commentSize= endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
endrec.append(comment)
endrec.append(maxCommentStart+start)#Try to read the "Zip64 end of central directory" structure
return _EndRecData64(fpin, maxCommentStart + start -filesize,
endrec)#Unable to find a valid end of central directory structure
returnNoneclassZipInfo (object):"""Class with attributes describing each file in the ZIP archive."""
__slots__ =('orig_filename','filename','date_time','compress_type','comment','extra','create_system','create_version','extract_version','reserved','flag_bits','volume','internal_attr','external_attr','header_offset','CRC','compress_size','file_size','_raw_time',
)def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.orig_filename= filename #Original file name in archive
#Terminate the file name at the first null byte. Null bytes in file
#names are used as tricks by viruses in archives.
null_byte =filename.find(chr(0))if null_byte >=0:
filename=filename[0:null_byte]#This is used to ensure paths in generated ZIP files always use
#forward slashes as the directory separator, as required by the
#ZIP format specification.
if os.sep != "/" and os.sep infilename:
filename= filename.replace(os.sep, "/")
self.filename= filename #Normalized file name
self.date_time = date_time #year, month, day, hour, min, sec
if date_time[0] < 1980:raise ValueError('ZIP does not support timestamps before 1980')#Standard values:
self.compress_type = ZIP_STORED #Type of compression for the file
self.comment = b"" #Comment for each file
self.extra = b"" #ZIP extra data
if sys.platform == 'win32':
self.create_system= 0 #System which created ZIP archive
else:#Assume everything else is unix-y
self.create_system = 3 #System which created ZIP archive
self.create_version = DEFAULT_VERSION #Version which created ZIP archive
self.extract_version = DEFAULT_VERSION #Version needed to extract archive
self.reserved = 0 #Must be zero
self.flag_bits = 0 #ZIP flag bits
self.volume = 0 #Volume number of file header
self.internal_attr = 0 #Internal attributes
self.external_attr = 0 #External file attributes
#Other attributes are set by class ZipFile:
#header_offset Byte offset to the file header
#CRC CRC-32 of the uncompressed file
#compress_size Size of the compressed file
#file_size Size of the uncompressed file
def __repr__(self):
result= ['
result.append('compress_type=%s' %compressor_names.get(self.compress_type,
self.compress_type))
hi= self.external_attr >> 16lo= self.external_attr & 0xFFFF
ifhi:
result.append('filemode=%r' %stat.filemode(hi))iflo:
result.append('external_attr=%#x' %lo)
isdir= self.filename[-1:] == '/'
if not isdir orself.file_size:
result.append('file_size=%r' %self.file_size)if ((not isdir or self.compress_size) and(self.compress_type!= ZIP_STORED orself.file_size!=self.compress_size)):
result.append('compress_size=%r' %self.compress_size)
result.append('>')return ''.join(result)def FileHeader(self, zip64=None):"""Return the per-file header as a string."""dt=self.date_time
dosdate= (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime= dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)if self.flag_bits & 0x08:#Set these to zero because we write them after the file data
CRC = compress_size = file_size =0else:
CRC=self.CRC
compress_size=self.compress_size
file_size=self.file_size
extra=self.extra
min_version=0if zip64 isNone:
zip64= file_size > ZIP64_LIMIT or compress_size >ZIP64_LIMITifzip64:
fmt= ' ZIP64_LIMIT or compress_size >ZIP64_LIMIT:if notzip64:raise LargeZipFile("Filesize would require ZIP64 extensions")#File is larger than what fits into a 4 byte integer,
#fall back to the ZIP64 extension
file_size = 0xffffffffcompress_size= 0xffffffffmin_version=ZIP64_VERSIONif self.compress_type ==ZIP_BZIP2:
min_version=max(BZIP2_VERSION, min_version)elif self.compress_type ==ZIP_LZMA:
min_version=max(LZMA_VERSION, min_version)
self.extract_version=max(min_version, self.extract_version)
self.create_version=max(min_version, self.create_version)
filename, flag_bits=self._encodeFilenameFlags()
header=struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, flag_bits,
self.compress_type, dostime, dosdate, CRC,
compress_size, file_size,
len(filename), len(extra))return header + filename +extradef_encodeFilenameFlags(self):try:return self.filename.encode('ascii'), self.flag_bitsexceptUnicodeEncodeError:return self.filename.encode('utf-8'), self.flag_bits | 0x800
def_decodeExtra(self):#Try to decode the extra field.
extra =self.extra
unpack=struct.unpackwhile len(extra) >= 4:
tp, ln= unpack('= 24:
counts= unpack('
counts= unpack('
counts= unpack('
counts=()else:raise RuntimeError("Corrupt extra field %s"%(ln,))
idx=0#ZIP64 extension (large files and/or large archives)
if self.file_size in (0xffffffffffffffff, 0xffffffff):
self.file_size=counts[idx]
idx+= 1
if self.compress_size == 0xFFFFFFFF:
self.compress_size=counts[idx]
idx+= 1
if self.header_offset == 0xffffffff:
old=self.header_offset
self.header_offset=counts[idx]
idx+=1extra= extra[ln+4:]class_ZipDecrypter:"""Class to handle decryption of files stored within a ZIP archive.
ZIP supports a password-based form of encryption. Even though known
plaintext attacks have been found against it, it is still useful
to be able to get data out of such a file.
Usage:
zd = _ZipDecrypter(mypwd)
plain_char = zd(cypher_char)
plain_text = map(zd, cypher_text)"""
def_GenerateCRCTable():"""Generate a CRC-32 table.
ZIP encryption uses the CRC32 one-byte primitive for scrambling some
internal keys. We noticed that a direct implementation is faster than
relying on binascii.crc32()."""poly= 0xedb88320table= [0] * 256
for i in range(256):
crc=ifor j in range(8):if crc & 1:
crc= ((crc >> 1) & 0x7FFFFFFF) ^polyelse:
crc= ((crc >> 1) & 0x7FFFFFFF)
table[i]=crcreturntable
crctable=Nonedef_crc32(self, ch, crc):"""Compute the CRC32 primitive on one byte."""
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]def __init__(self, pwd):if _ZipDecrypter.crctable isNone:
_ZipDecrypter.crctable=_ZipDecrypter._GenerateCRCTable()
self.key0= 305419896self.key1= 591751049self.key2= 878082192
for p inpwd:
self._UpdateKeys(p)def_UpdateKeys(self, c):
self.key0=self._crc32(c, self.key0)
self.key1= (self.key1 + (self.key0 & 255)) & 4294967295self.key1= (self.key1 * 134775813 + 1) & 4294967295self.key2= self._crc32((self.key1 >> 24) & 255, self.key2)def __call__(self, c):"""Decrypt a single character."""
assertisinstance(c, int)
k= self.key2 | 2c= c ^ (((k * (k^1)) >> 8) & 255)
self._UpdateKeys(c)returncclassLZMACompressor:def __init__(self):
self._comp=Nonedef_init(self):
props= lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
self._comp= lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
])return struct.pack('
self._decomp=None
self._unconsumed= b''self.eof=Falsedefdecompress(self, data):if self._decomp isNone:
self._unconsumed+=dataif len(self._unconsumed) <= 4:return b''psize,= struct.unpack('
lzma._decode_filter_properties(lzma.FILTER_LZMA1,
self._unconsumed[4:4 +psize])
])
data= self._unconsumed[4 +psize:]delself._unconsumed
result=self._decomp.decompress(data)
self.eof=self._decomp.eofreturnresult
compressor_names={
0:'store',1: 'shrink',2: 'reduce',3: 'reduce',4: 'reduce',5: 'reduce',6: 'implode',7: 'tokenize',8: 'deflate',9: 'deflate64',10: 'implode',12: 'bzip2',14: 'lzma',18: 'terse',19: 'lz77',97: 'wavpack',98: 'ppmd',
}def_check_compression(compression):if compression ==ZIP_STORED:pass
elif compression ==ZIP_DEFLATED:if notzlib:raiseRuntimeError("Compression requires the (missing) zlib module")elif compression ==ZIP_BZIP2:if notbz2:raiseRuntimeError("Compression requires the (missing) bz2 module")elif compression ==ZIP_LZMA:if notlzma:raiseRuntimeError("Compression requires the (missing) lzma module")else:raise RuntimeError("That compression method is not supported")def_get_compressor(compress_type):if compress_type ==ZIP_DEFLATED:returnzlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED,-15)elif compress_type ==ZIP_BZIP2:returnbz2.BZ2Compressor()elif compress_type ==ZIP_LZMA:returnLZMACompressor()else:returnNonedef_get_decompressor(compress_type):if compress_type ==ZIP_STORED:returnNoneelif compress_type ==ZIP_DEFLATED:return zlib.decompressobj(-15)elif compress_type ==ZIP_BZIP2:returnbz2.BZ2Decompressor()elif compress_type ==ZIP_LZMA:returnLZMADecompressor()else:
descr=compressor_names.get(compress_type)ifdescr:raise NotImplementedError("compression type %d (%s)" %(compress_type, descr))else:raise NotImplementedError("compression type %d" %(compress_type,))class_SharedFile:def __init__(self, file, pos, close, lock):
self._file=file
self._pos=pos
self._close=close
self._lock=lockdef read(self, n=-1):
with self._lock:
self._file.seek(self._pos)
data=self._file.read(n)
self._pos=self._file.tell()returndatadefclose(self):if self._file is notNone:
fileobj=self._file
self._file=None
self._close(fileobj)#Provide the tell method for unseekable stream
class_Tellable:def __init__(self, fp):
self.fp=fp
self.offset=0defwrite(self, data):
n=self.fp.write(data)
self.offset+=nreturnndeftell(self):returnself.offsetdefflush(self):
self.fp.flush()defclose(self):
self.fp.close()classZipExtFile(io.BufferedIOBase):"""File-like object for reading an archive member.
Is returned by ZipFile.open()."""
#Max size supported by decompressor.
MAX_N = 1 << 31 - 1
#Read from compressed files in 4k blocks.
MIN_READ_SIZE = 4096
#Search for universal newlines or line chunks.
PATTERN = re.compile(br'^(?P[^\r\n]+)|(?P\n|\r\n?)')def __init__(self, fileobj, mode, zipinfo, decrypter=None,
close_fileobj=False):
self._fileobj=fileobj
self._decrypter=decrypter
self._close_fileobj=close_fileobj
self._compress_type=zipinfo.compress_type
self._compress_left=zipinfo.compress_size
self._left=zipinfo.file_size
self._decompressor=_get_decompressor(self._compress_type)
self._eof=False
self._readbuffer= b''self._offset=0
self._universal= 'U' inmode
self.newlines=None#Adjust read size for encrypted files since the first 12 bytes
#are for the encryption/password information.
if self._decrypter is notNone:
self._compress_left-= 12self.mode=mode
self.name=zipinfo.filenameif hasattr(zipinfo, 'CRC'):
self._expected_crc=zipinfo.CRC
self._running_crc= crc32(b'')else:
self._expected_crc=Nonedef __repr__(self):
result= ['
self.__class__.__qualname__)]if notself.closed:
result.append('name=%r mode=%r' %(self.name, self.mode))if self._compress_type !=ZIP_STORED:
result.append('compress_type=%s' %compressor_names.get(self._compress_type,
self._compress_type))else:
result.append('[closed]')
result.append('>')return ''.join(result)def readline(self, limit=-1):"""Read and return a line from the stream.
If limit is specified, at most limit bytes will be read."""
if not self._universal and limit <0:#Shortcut common case - newline found in buffer.
i = self._readbuffer.find(b'\n', self._offset) + 1
if i >0:
line=self._readbuffer[self._offset: i]
self._offset=ireturnlineif notself._universal:returnio.BufferedIOBase.readline(self, limit)
line= b''
while limit < 0 or len(line)
readahead= self.peek(2)if readahead == b'':returnline# #Search for universal newlines or line chunks.
# #The pattern returns either a line chunk or a newline, but not
#both. Combined with peek(2), we are assured that the sequence
#'\r\n' is always retrieved completely and never split into
#separate newlines - '\r', '\n' due to coincidental readaheads.
# match =self.PATTERN.search(readahead)
newline= match.group('newline')if newline is notNone:if self.newlines isNone:
self.newlines=[]if newline not inself.newlines:
self.newlines.append(newline)
self._offset+=len(newline)return line + b'\n'chunk= match.group('chunk')if limit >=0:
chunk= chunk[: limit -len(line)]
self._offset+=len(chunk)
line+=chunkreturnlinedef peek(self, n=1):"""Returns buffered bytes without advancing the position."""
if n > len(self._readbuffer) -self._offset:
chunk=self.read(n)if len(chunk) >self._offset:
self._readbuffer= chunk +self._readbuffer[self._offset:]
self._offset=0else:
self._offset-=len(chunk)#Return up to 512 bytes to reduce allocation overhead for tight loops.
return self._readbuffer[self._offset: self._offset + 512]defreadable(self):returnTruedef read(self, n=-1):"""Read and return up to n bytes.
If the argument is omitted, None, or negative, data is read and returned until EOF is reached.."""
if n is None or n <0:
buf=self._readbuffer[self._offset:]
self._readbuffer= b''self._offset=0while notself._eof:
buf+=self._read1(self.MAX_N)returnbuf
end= n +self._offsetif end
buf=self._readbuffer[self._offset:end]
self._offset=endreturnbuf
n= end -len(self._readbuffer)
buf=self._readbuffer[self._offset:]
self._readbuffer= b''self._offset=0while n > 0 and notself._eof:
data=self._read1(n)if n
self._readbuffer=data
self._offset=n
buf+=data[:n]breakbuf+=data
n-=len(data)returnbufdef_update_crc(self, newdata):#Update the CRC using the given data.
if self._expected_crc isNone:#No need to compute the CRC if we don't have a reference value
returnself._running_crc=crc32(newdata, self._running_crc)#Check the CRC if we're at the end of the file
if self._eof and self._running_crc !=self._expected_crc:raise BadZipFile("Bad CRC-32 for file %r" %self.name)defread1(self, n):"""Read up to n bytes with at most one read() system call."""
if n is None or n <0:
buf=self._readbuffer[self._offset:]
self._readbuffer= b''self._offset=0while notself._eof:
data=self._read1(self.MAX_N)ifdata:
buf+=databreak
returnbuf
end= n +self._offsetif end
buf=self._readbuffer[self._offset:end]
self._offset=endreturnbuf
n= end -len(self._readbuffer)
buf=self._readbuffer[self._offset:]
self._readbuffer= b''self._offset=0if n >0:while notself._eof:
data=self._read1(n)if n
self._readbuffer=data
self._offset=n
buf+=data[:n]break
ifdata:
buf+=databreak
returnbufdef_read1(self, n):#Read up to n compressed bytes with at most one read() system call,
#decrypt and decompress them.
if self._eof or n <=0:return b''
#Read from file.
if self._compress_type ==ZIP_DEFLATED:## Handle unconsumed data.
data =self._decompressor.unconsumed_tailif n >len(data):
data+= self._read2(n -len(data))else:
data=self._read2(n)if self._compress_type ==ZIP_STORED:
self._eof= self._compress_left <=0elif self._compress_type ==ZIP_DEFLATED:
n=max(n, self.MIN_READ_SIZE)
data=self._decompressor.decompress(data, n)
self._eof= (self._decompressor.eof orself._compress_left<= 0 and
notself._decompressor.unconsumed_tail)ifself._eof:
data+=self._decompressor.flush()else:
data=self._decompressor.decompress(data)
self._eof= self._decompressor.eof or self._compress_left <=0
data=data[:self._left]
self._left-=len(data)if self._left <=0:
self._eof=True
self._update_crc(data)returndatadef_read2(self, n):if self._compress_left <=0:return b''n=max(n, self.MIN_READ_SIZE)
n=min(n, self._compress_left)
data=self._fileobj.read(n)
self._compress_left-=len(data)if notdata:raiseEOFErrorif self._decrypter is notNone:
data=bytes(map(self._decrypter, data))returndatadefclose(self):try:ifself._close_fileobj:
self._fileobj.close()finally:
super().close()classZipFile:"""Class with methods to open, read, write, close, list zip files.
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read 'r', write 'w', exclusive create 'x',
or append 'a'.
compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would
be necessary."""fp= None #Set here since __del__ checks it
_windows_illegal_name_trans_table =Nonedef __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):"""Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
or append 'a'."""
if mode not in ('r', 'w', 'x', 'a'):raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
_check_compression(compression)
self._allowZip64=allowZip64
self._didModify=False
self.debug= 0 #Level of printing: 0 through 3
self.NameToInfo = {} #Find file info given name
self.filelist = [] #List of ZipInfo instances for archive
self.compression = compression #Method of compression
self.mode =mode
self.pwd=None
self._comment= b''
#Check if we were passed a file-like object
ifisinstance(file, str):#No, it's a filename
self._filePassed =0
self.filename=file
modeDict= {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b','r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
filemode=modeDict[mode]whileTrue:try:
self.fp=io.open(file, filemode)exceptOSError:if filemode inmodeDict:
filemode=modeDict[filemode]continue
raise
break
else:
self._filePassed= 1self.fp=file
self.filename= getattr(file, 'name', None)
self._fileRefCnt= 1self._lock=threading.RLock()
self._seekable=Truetry:if mode == 'r':
self._RealGetContents()elif mode in ('w', 'x'):#set the modified flag so central directory gets written
#even if no files are added to the archive
self._didModify =Truetry:
self.start_dir=self.fp.tell()except(AttributeError, OSError):
self.fp=_Tellable(self.fp)
self.start_dir=0
self._seekable=Falseelse:#Some file-like objects can provide tell() but not seek()
try:
self.fp.seek(self.start_dir)except(AttributeError, OSError):
self._seekable=Falseelif mode == 'a':try:#See if file is a zip file
self._RealGetContents()#seek to start of directory and overwrite
self.fp.seek(self.start_dir)exceptBadZipFile:#file is not a zip file, just append
self.fp.seek(0, 2)#set the modified flag so central directory gets written
#even if no files are added to the archive
self._didModify =True
self.start_dir=self.fp.tell()else:raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")except:
fp=self.fp
self.fp=None
self._fpclose(fp)raise
def __enter__(self):returnselfdef __exit__(self, type, value, traceback):
self.close()def __repr__(self):
result= ['
self.__class__.__qualname__)]if self.fp is notNone:ifself._filePassed:
result.append('file=%r' %self.fp)elif self.filename is notNone:
result.append('filename=%r' %self.filename)
result.append('mode=%r' %self.mode)else:
result.append('[closed]')
result.append('>')return ''.join(result)def_RealGetContents(self):"""Read in the table of contents for the ZIP file."""fp=self.fptry:
endrec=_EndRecData(fp)exceptOSError:raise BadZipFile("File is not a zip file")if notendrec:raise BadZipFile("File is not a zip file")if self.debug > 1:print(endrec)
size_cd= endrec[_ECD_SIZE] #bytes in central directory
offset_cd = endrec[_ECD_OFFSET] #offset of central directory
self._comment = endrec[_ECD_COMMENT] #archive comment
#"concat" is zero, unless zip was concatenated to another file
concat = endrec[_ECD_LOCATION] - size_cd -offset_cdif endrec[_ECD_SIGNATURE] ==stringEndArchive64:#If Zip64 extension structures are present, account for them
concat -= (sizeEndCentDir64 +sizeEndCentDir64Locator)if self.debug > 2:
inferred= concat +offset_cdprint("given, inferred, offset", offset_cd, inferred, concat)#self.start_dir: Position of start of central directory
self.start_dir = offset_cd +concat
fp.seek(self.start_dir, 0)
data=fp.read(size_cd)
fp=io.BytesIO(data)
total=0while total
centdir=fp.read(sizeCentralDir)if len(centdir) !=sizeCentralDir:raise BadZipFile("Truncated central directory")
centdir=struct.unpack(structCentralDir, centdir)if centdir[_CD_SIGNATURE] !=stringCentralDir:raise BadZipFile("Bad magic number for central directory")if self.debug > 2:print(centdir)
filename=fp.read(centdir[_CD_FILENAME_LENGTH])
flags= centdir[5]if flags & 0x800:#UTF-8 file names extension
filename = filename.decode('utf-8')else:#Historical ZIP filename encoding
filename = filename.decode('gbk')#filename = filename.encode("cp437").decode('gbk')
#Create ZipInfo instance to store file information
x =ZipInfo(filename)
x.extra=fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment=fp.read(centdir[_CD_COMMENT_LENGTH])
x.header_offset=centdir[_CD_LOCAL_HEADER_OFFSET]
(x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d,
x.CRC, x.compress_size, x.file_size)= centdir[1:12]if x.extract_version >MAX_EXTRACT_VERSION:raise NotImplementedError("zip file version %.1f" %(x.extract_version/ 10))
x.volume, x.internal_attr, x.external_attr= centdir[15:18]#Convert date/time code to (year, month, day, hour, min, sec)
x._raw_time =t
x.date_time= ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
t>>11, (t>>5)&0x3F, (t&0x1F) * 2)
x._decodeExtra()
x.header_offset= x.header_offset +concat
self.filelist.append(x)
self.NameToInfo[x.filename]=x#update total bytes read from central directory
total = (total + sizeCentralDir +centdir[_CD_FILENAME_LENGTH]+centdir[_CD_EXTRA_FIELD_LENGTH]+centdir[_CD_COMMENT_LENGTH])if self.debug > 2:print("total", total)defnamelist(self):"""Return a list of file names in the archive."""
return [data.filename for data inself.filelist]definfolist(self):"""Return a list of class ZipInfo instances for files in the
archive."""
returnself.filelistdef printdir(self, file=None):"""Print a table of contents for the zip file."""
print("%-46s %19s %12s" % ("File Name", "Modified", "Size"),
file=file)for zinfo inself.filelist:
date= "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]print("%-46s %s %12d" %(zinfo.filename, date, zinfo.file_size),
file=file)deftestzip(self):"""Read all the files and check the CRC."""chunk_size= 2 ** 20
for zinfo inself.filelist:try:#Read by chunks, to avoid an OverflowError or a
#MemoryError with very large embedded files.
with self.open(zinfo.filename, "r") as f:while f.read(chunk_size): #Check CRC-32
pass
exceptBadZipFile:returnzinfo.filenamedefgetinfo(self, name):"""Return the instance of ZipInfo given 'name'."""info=self.NameToInfo.get(name)if info isNone:raiseKeyError('There is no item named %r in the archive' %name)returninfodefsetpassword(self, pwd):"""Set default password for encrypted files."""
if pwd and notisinstance(pwd, bytes):raise TypeError("pwd: expected bytes, got %s" %type(pwd))ifpwd:
self.pwd=pwdelse:
self.pwd=None
@propertydefcomment(self):"""The comment text associated with the ZIP file."""
returnself._comment
@comment.setterdefcomment(self, comment):if notisinstance(comment, bytes):raise TypeError("comment: expected bytes, got %s" %type(comment))#check for valid comment length
if len(comment) >ZIP_MAX_COMMENT:importwarnings
warnings.warn('Archive comment is too long; truncating to %d bytes'
% ZIP_MAX_COMMENT, stacklevel=2)
comment=comment[:ZIP_MAX_COMMENT]
self._comment=comment
self._didModify=Truedef read(self, name, pwd=None):"""Return file bytes (as a string) for name."""with self.open(name,"r", pwd) as fp:returnfp.read()def open(self, name, mode="r", pwd=None):"""Return file-like object for 'name'."""
if mode not in ("r", "U", "rU"):raise RuntimeError('open() requires mode "r", "U", or "rU"')if 'U' inmode:importwarnings
warnings.warn("'U' mode is deprecated",
DeprecationWarning,2)if pwd and notisinstance(pwd, bytes):raise TypeError("pwd: expected bytes, got %s" %type(pwd))if notself.fp:raiseRuntimeError("Attempt to read ZIP archive that was already closed")#Make sure we have an info object
ifisinstance(name, ZipInfo):#'name' is already an info object
zinfo =nameelse:#Get info object for name
zinfo =self.getinfo(name)
self._fileRefCnt+= 1zef_file=_SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)try:#Skip the file header:
fheader =zef_file.read(sizeFileHeader)if len(fheader) !=sizeFileHeader:raise BadZipFile("Truncated file header")
fheader=struct.unpack(structFileHeader, fheader)if fheader[_FH_SIGNATURE] !=stringFileHeader:raise BadZipFile("Bad magic number for file header")
fname=zef_file.read(fheader[_FH_FILENAME_LENGTH])iffheader[_FH_EXTRA_FIELD_LENGTH]:
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])if zinfo.flag_bits & 0x20:#Zip 2.7: compressed patched data
raise NotImplementedError("compressed patched data (flag bit 5)")if zinfo.flag_bits & 0x40:#strong encryption
raise NotImplementedError("strong encryption (flag bit 6)")if zinfo.flag_bits & 0x800:#UTF-8 filename
fname_str = fname.decode("utf-8")else:
fname_str= fname.decode("cp437")
fname_str= fname_str.encode("cp437").decode('gbk')if fname_str !=zinfo.orig_filename:raiseBadZipFile('File name in directory %r and header %r differ.'
%(zinfo.orig_filename, fname))#check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & 0x1zd=Noneifis_encrypted:if notpwd:
pwd=self.pwdif notpwd:raise RuntimeError("File %s is encrypted, password"
"required for extraction" %name)
zd=_ZipDecrypter(pwd)#The first 12 bytes in the cypher stream is an encryption header
#used to strengthen the algorithm. The first 11 bytes are
#completely random, while the 12th contains the MSB of the CRC,
#or the MSB of the file time depending on the header type
#and is used to check the correctness of the password.
header = zef_file.read(12)
h= list(map(zd, header[0:12]))if zinfo.flag_bits & 0x8:#compare against the file type from extended local headers
check_byte = (zinfo._raw_time >> 8) & 0xff
else:#compare against the CRC otherwise
check_byte = (zinfo.CRC >> 24) & 0xff
if h[11] !=check_byte:raise RuntimeError("Bad password for file", name)returnZipExtFile(zef_file, mode, zinfo, zd, True)except:
zef_file.close()raise
def extract(self, member, path=None, pwd=None):"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
as possible. `member' may be a filename or a ZipInfo object. You can
specify a different directory using `path'."""
if notisinstance(member, ZipInfo):
member=self.getinfo(member)if path isNone:
path=os.getcwd()returnself._extract_member(member, path, pwd)def extractall(self, path=None, members=None, pwd=None):"""Extract all members from the archive to the current working
directory. `path' specifies a different directory to extract to.
`members' is optional and must be a subset of the list returned
by namelist()."""
if members isNone:
members=self.namelist()for zipinfo inmembers:
self.extract(zipinfo, path, pwd)
@classmethoddef_sanitize_windows_name(cls, arcname, pathsep):"""Replace bad characters and remove trailing dots from parts."""table=cls._windows_illegal_name_trans_tableif nottable:
illegal= ':<>|"?*'table= str.maketrans(illegal, '_' *len(illegal))
cls._windows_illegal_name_trans_table=table
arcname=arcname.translate(table)#remove trailing dots
arcname = (x.rstrip('.') for x inarcname.split(pathsep))#rejoin, removing empty parts.
arcname = pathsep.join(x for x in arcname ifx)returnarcnamedef_extract_member(self, member, targetpath, pwd):"""Extract the ZipInfo object 'member' to a physical
file on the path targetpath."""
#build the destination pathname, replacing
#forward slashes to platform specific separators.
arcname = member.filename.replace('/', os.path.sep)ifos.path.altsep:
arcname=arcname.replace(os.path.altsep, os.path.sep)#interpret absolute pathname as relative, remove drive letter or
#UNC path, redundant separators, "." and ".." components.
arcname = os.path.splitdrive(arcname)[1]
invalid_path_parts= ('', os.path.curdir, os.path.pardir)
arcname= os.path.sep.join(x for x inarcname.split(os.path.sep)if x not ininvalid_path_parts)if os.path.sep == '\\':#filter illegal characters on Windows
arcname =self._sanitize_windows_name(arcname, os.path.sep)
targetpath=os.path.join(targetpath, arcname)
targetpath=os.path.normpath(targetpath)#Create all upper directories if necessary.
upperdirs =os.path.dirname(targetpath)if upperdirs and notos.path.exists(upperdirs):
os.makedirs(upperdirs)if member.filename[-1] == '/':if notos.path.isdir(targetpath):
os.mkdir(targetpath)returntargetpath
with self.open(member, pwd=pwd) as source, \
open(targetpath,"wb") as target:
shutil.copyfileobj(source, target)returntargetpathdef_writecheck(self, zinfo):"""Check for errors before writing a file to the archive."""
if zinfo.filename inself.NameToInfo:importwarnings
warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)if self.mode not in ('w', 'x', 'a'):raise RuntimeError("write() requires mode 'w', 'x', or 'a'")if notself.fp:raiseRuntimeError("Attempt to write ZIP archive that was already closed")
_check_compression(zinfo.compress_type)if notself._allowZip64:
requires_zip64=Noneif len(self.filelist) >=ZIP_FILECOUNT_LIMIT:
requires_zip64= "Files count"
elif zinfo.file_size >ZIP64_LIMIT:
requires_zip64= "Filesize"
elif zinfo.header_offset >ZIP64_LIMIT:
requires_zip64= "Zipfile size"
ifrequires_zip64:raise LargeZipFile(requires_zip64 +
"would require ZIP64 extensions")def write(self, filename, arcname=None, compress_type=None):"""Put the bytes from filename into the archive under the name
arcname."""
if notself.fp:raiseRuntimeError("Attempt to write to ZIP archive that was already closed")
st=os.stat(filename)
isdir=stat.S_ISDIR(st.st_mode)
mtime=time.localtime(st.st_mtime)
date_time= mtime[0:6]#Create ZipInfo instance to store file information
if arcname isNone:
arcname=filename
arcname= os.path.normpath(os.path.splitdrive(arcname)[1])while arcname[0] in(os.sep, os.altsep):
arcname= arcname[1:]ifisdir:
arcname+= '/'zinfo=ZipInfo(arcname, date_time)
zinfo.external_attr= (st[0] & 0xFFFF) << 16 #Unix attributes
ifisdir:
zinfo.compress_type=ZIP_STOREDelif compress_type isNone:
zinfo.compress_type=self.compressionelse:
zinfo.compress_type=compress_type
zinfo.file_size=st.st_size
zinfo.flag_bits= 0x00with self._lock:ifself._seekable:
self.fp.seek(self.start_dir)
zinfo.header_offset= self.fp.tell() #Start of header bytes
if zinfo.compress_type ==ZIP_LZMA:#Compressed data includes an end-of-stream (EOS) marker
zinfo.flag_bits |= 0x02self._writecheck(zinfo)
self._didModify=Trueifisdir:
zinfo.file_size=0
zinfo.compress_size=0
zinfo.CRC=0
zinfo.external_attr|= 0x10 #MS-DOS directory flag
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename]=zinfo
self.fp.write(zinfo.FileHeader(False))
self.start_dir=self.fp.tell()returncmpr=_get_compressor(zinfo.compress_type)if notself._seekable:
zinfo.flag_bits|= 0x08with open(filename,"rb") as fp:#Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC =0
zinfo.compress_size= compress_size =0#Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and\
zinfo.file_size* 1.05 >ZIP64_LIMIT
self.fp.write(zinfo.FileHeader(zip64))
file_size=0while 1:
buf= fp.read(1024 * 8)if notbuf:breakfile_size= file_size +len(buf)
CRC=crc32(buf, CRC)ifcmpr:
buf=cmpr.compress(buf)
compress_size= compress_size +len(buf)
self.fp.write(buf)ifcmpr:
buf=cmpr.flush()
compress_size= compress_size +len(buf)
self.fp.write(buf)
zinfo.compress_size=compress_sizeelse:
zinfo.compress_size=file_size
zinfo.CRC=CRC
zinfo.file_size=file_sizeif zinfo.flag_bits & 0x08:#Write CRC and file sizes after the file data
fmt = '
zinfo.file_size))
self.start_dir=self.fp.tell()else:if not zip64 andself._allowZip64:if file_size >ZIP64_LIMIT:raise RuntimeError('File size has increased during compressing')if compress_size >ZIP64_LIMIT:raise RuntimeError('Compressed size larger than uncompressed size')#Seek backwards and write file header (which will now include
#correct CRC and file sizes)
self.start_dir = self.fp.tell() #Preserve current position in file
self.fp.seek(zinfo.header_offset)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(self.start_dir)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename]=zinfodef writestr(self, zinfo_or_arcname, data, compress_type=None):"""Write a file into the archive. The contents is 'data', which
may be either a 'str' or a 'bytes' instance; if it is a 'str',
it is encoded as UTF-8 first.
'zinfo_or_arcname' is either a ZipInfo instance or
the name of the file in the archive."""
ifisinstance(data, str):
data= data.encode("utf-8")if notisinstance(zinfo_or_arcname, ZipInfo):
zinfo= ZipInfo(filename=zinfo_or_arcname,
date_time=time.localtime(time.time())[:6])
zinfo.compress_type=self.compressionif zinfo.filename[-1] == '/':
zinfo.external_attr= 0o40775 << 16 #drwxrwxr-x
zinfo.external_attr |= 0x10 #MS-DOS directory flag
else:
zinfo.external_attr= 0o600 << 16 #?rw-------
else:
zinfo=zinfo_or_arcnameif notself.fp:raiseRuntimeError("Attempt to write to ZIP archive that was already closed")
zinfo.file_size= len(data) #Uncompressed size
with self._lock:ifself._seekable:
self.fp.seek(self.start_dir)
zinfo.header_offset= self.fp.tell() #Start of header data
if compress_type is notNone:
zinfo.compress_type=compress_type
zinfo.header_offset= self.fp.tell() #Start of header data
if compress_type is notNone:
zinfo.compress_type=compress_typeif zinfo.compress_type ==ZIP_LZMA:#Compressed data includes an end-of-stream (EOS) marker
zinfo.flag_bits |= 0x02self._writecheck(zinfo)
self._didModify=True
zinfo.CRC= crc32(data) #CRC-32 checksum
co =_get_compressor(zinfo.compress_type)ifco:
data= co.compress(data) +co.flush()
zinfo.compress_size= len(data) #Compressed size
else:
zinfo.compress_size=zinfo.file_size
zip64= zinfo.file_size > ZIP64_LIMIT or\
zinfo.compress_size>ZIP64_LIMITif zip64 and notself._allowZip64:raise LargeZipFile("Filesize would require ZIP64 extensions")
self.fp.write(zinfo.FileHeader(zip64))
self.fp.write(data)if zinfo.flag_bits & 0x08:#Write CRC and file sizes after the file data
fmt = '
zinfo.file_size))
self.fp.flush()
self.start_dir=self.fp.tell()
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename]=zinfodef __del__(self):"""Call the "close()" method in case the user forgot."""self.close()defclose(self):"""Close the file, and for mode 'w', 'x' and 'a' write the ending
records."""
if self.fp isNone:return
try:if self.mode in ('w', 'x', 'a') and self._didModify: #write ending records
with self._lock:ifself._seekable:
self.fp.seek(self.start_dir)
self._write_end_record()finally:
fp=self.fp
self.fp=None
self._fpclose(fp)def_write_end_record(self):for zinfo in self.filelist: #write central directory
dt =zinfo.date_time
dosdate= (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime= dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra=[]if zinfo.file_size >ZIP64_LIMIT \or zinfo.compress_size >ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
file_size= 0xffffffffcompress_size= 0xffffffff
else:
file_size=zinfo.file_size
compress_size=zinfo.compress_sizeif zinfo.header_offset >ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset= 0xffffffff
else:
header_offset=zinfo.header_offset
extra_data=zinfo.extra
min_version=0ifextra:#Append a ZIP64 field to the extra's
extra_data =struct.pack('
min_version=ZIP64_VERSIONif zinfo.compress_type ==ZIP_BZIP2:
min_version=max(BZIP2_VERSION, min_version)elif zinfo.compress_type ==ZIP_LZMA:
min_version=max(LZMA_VERSION, min_version)
extract_version=max(min_version, zinfo.extract_version)
create_version=max(min_version, zinfo.create_version)try:
filename, flag_bits=zinfo._encodeFilenameFlags()
centdir=struct.pack(structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)exceptDeprecationWarning:print((structCentralDir, stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset), file=sys.stderr)raiseself.fp.write(centdir)
self.fp.write(filename)
self.fp.write(extra_data)
self.fp.write(zinfo.comment)
pos2=self.fp.tell()#Write end-of-zip-archive record
centDirCount =len(self.filelist)
centDirSize= pos2 -self.start_dir
centDirOffset=self.start_dir
requires_zip64=Noneif centDirCount >ZIP_FILECOUNT_LIMIT:
requires_zip64= "Files count"
elif centDirOffset >ZIP64_LIMIT:
requires_zip64= "Central directory offset"
elif centDirSize >ZIP64_LIMIT:
requires_zip64= "Central directory size"
ifrequires_zip64:#Need to write the ZIP64 end-of-archive records
if notself._allowZip64:raise LargeZipFile(requires_zip64 +
"would require ZIP64 extensions")
zip64endrec=struct.pack(
structEndArchive64, stringEndArchive64,44, 45, 45, 0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset)
self.fp.write(zip64endrec)
zip64locrec=struct.pack(
structEndArchive64Locator,
stringEndArchive64Locator, 0, pos2,1)
self.fp.write(zip64locrec)
centDirCount= min(centDirCount, 0xFFFF)
centDirSize= min(centDirSize, 0xFFFFFFFF)
centDirOffset= min(centDirOffset, 0xFFFFFFFF)
endrec=struct.pack(structEndArchive, stringEndArchive,
0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset, len(self._comment))
self.fp.write(endrec)
self.fp.write(self._comment)
self.fp.flush()def_fpclose(self, fp):assert self._fileRefCnt >0
self._fileRefCnt-= 1
if not self._fileRefCnt and notself._filePassed:
fp.close()classPyZipFile(ZipFile):"""Class to create ZIP archives with Python library files and packages."""
def __init__(self, file, mode="r", compression=ZIP_STORED,
allowZip64=True, optimize=-1):
ZipFile.__init__(self, file, mode=mode, compression=compression,
allowZip64=allowZip64)
self._optimize=optimizedef writepy(self, pathname, basename="", filterfunc=None):"""Add all files from "pathname" to the ZIP archive.
If pathname is a package directory, search the directory and
all package subdirectories recursively for all *.py and enter
the modules into the archive. If pathname is a plain
directory, listdir *.py and enter all modules. Else, pathname
must be a Python *.py file and the module will be put into the
archive. Added modules are always module.pyc.
This method will compile the module.py into module.pyc if
necessary.
If filterfunc(pathname) is given, it is called with every argument.
When it is False, the file or directory is skipped."""
if filterfunc and notfilterfunc(pathname):ifself.debug:
label= 'path' if os.path.isdir(pathname) else 'file'
print('%s "%s" skipped by filterfunc' %(label, pathname))returndir, name=os.path.split(pathname)ifos.path.isdir(pathname):
initname= os.path.join(pathname, "__init__.py")ifos.path.isfile(initname):#This is a package directory, add it
ifbasename:
basename= "%s/%s" %(basename, name)else:
basename=nameifself.debug:print("Adding package in", pathname, "as", basename)
fname, arcname= self._get_codename(initname[0:-3], basename)ifself.debug:print("Adding", arcname)
self.write(fname, arcname)
dirlist=os.listdir(pathname)
dirlist.remove("__init__.py")#Add all *.py files and package subdirectories
for filename indirlist:
path=os.path.join(pathname, filename)
root, ext=os.path.splitext(filename)ifos.path.isdir(path):if os.path.isfile(os.path.join(path, "__init__.py")):#This is a package directory, add it
self.writepy(path, basename,
filterfunc=filterfunc) #Recursive call
elif ext == ".py":if filterfunc and notfilterfunc(path):ifself.debug:print('file "%s" skipped by filterfunc' %path)continuefname, arcname= self._get_codename(path[0:-3],
basename)ifself.debug:print("Adding", arcname)
self.write(fname, arcname)else:#This is NOT a package directory, add its files at top level
ifself.debug:print("Adding files from directory", pathname)for filename inos.listdir(pathname):
path=os.path.join(pathname, filename)
root, ext=os.path.splitext(filename)if ext == ".py":if filterfunc and notfilterfunc(path):ifself.debug:print('file "%s" skipped by filterfunc' %path)continuefname, arcname= self._get_codename(path[0:-3],
basename)ifself.debug:print("Adding", arcname)
self.write(fname, arcname)else:if pathname[-3:] != ".py":raiseRuntimeError('Files added with writepy() must end with ".py"')
fname, arcname= self._get_codename(pathname[0:-3], basename)ifself.debug:print("Adding file", arcname)
self.write(fname, arcname)def_get_codename(self, pathname, basename):"""Return (filename, archivename) for the path.
Given a module name path, return the correct file path and
archive name, compiling if necessary. For example, given
/python/lib/string, return (/python/lib/string.pyc, string)."""
def _compile(file, optimize=-1):importpy_compileifself.debug:print("Compiling", file)try:
py_compile.compile(file, doraise=True, optimize=optimize)exceptpy_compile.PyCompileError as err:print(err.msg)returnFalsereturnTrue
file_py= pathname + ".py"file_pyc= pathname + ".pyc"pycache_opt0= importlib.util.cache_from_source(file_py, optimization='')
pycache_opt1= importlib.util.cache_from_source(file_py, optimization=1)
pycache_opt2= importlib.util.cache_from_source(file_py, optimization=2)if self._optimize == -1:#legacy mode: use whatever file is present
if (os.path.isfile(file_pyc) andos.stat(file_pyc).st_mtime>=os.stat(file_py).st_mtime):#Use .pyc file.
arcname = fname =file_pycelif (os.path.isfile(pycache_opt0) andos.stat(pycache_opt0).st_mtime>=os.stat(file_py).st_mtime):#Use the __pycache__/*.pyc file, but write it to the legacy pyc
#file name in the archive.
fname =pycache_opt0
arcname=file_pycelif (os.path.isfile(pycache_opt1) andos.stat(pycache_opt1).st_mtime>=os.stat(file_py).st_mtime):#Use the __pycache__/*.pyc file, but write it to the legacy pyc
#file name in the archive.
fname =pycache_opt1
arcname=file_pycelif (os.path.isfile(pycache_opt2) andos.stat(pycache_opt2).st_mtime>=os.stat(file_py).st_mtime):#Use the __pycache__/*.pyc file, but write it to the legacy pyc
#file name in the archive.
fname =pycache_opt2
arcname=file_pycelse:#Compile py into PEP 3147 pyc file.
if_compile(file_py):if sys.flags.optimize ==0:
fname=pycache_opt0elif sys.flags.optimize == 1:
fname=pycache_opt1else:
fname=pycache_opt2
arcname=file_pycelse:
fname= arcname =file_pyelse:#new mode: use given optimization level
if self._optimize ==0:
fname=pycache_opt0
arcname=file_pycelse:
arcname=file_pycif self._optimize == 1:
fname=pycache_opt1elif self._optimize == 2:
fname=pycache_opt2else:
msg= "invalid value for 'optimize': {!r}".format(self._optimize)raiseValueError(msg)if not (os.path.isfile(fname) andos.stat(fname).st_mtime>=os.stat(file_py).st_mtime):if not _compile(file_py, optimize=self._optimize):
fname= arcname =file_py
archivename= os.path.split(arcname)[1]ifbasename:
archivename= "%s/%s" %(basename, archivename)return(fname, archivename)def main(args =None):importtextwrap
USAGE=textwrap.dedent("""\
Usage:
zipfile.py -l zipfile.zip # Show listing of a zipfile
zipfile.py -t zipfile.zip # Test if a zipfile is valid
zipfile.py -e zipfile.zip target # Extract zipfile into target dir
zipfile.py -c zipfile.zip src ... # Create zipfile from sources""")if args isNone:
args= sys.argv[1:]if not args or args[0] not in ('-l', '-c', '-e', '-t'):print(USAGE)
sys.exit(1)if args[0] == '-l':if len(args) != 2:print(USAGE)
sys.exit(1)
with ZipFile(args[1], 'r') as zf:
zf.printdir()elif args[0] == '-t':if len(args) != 2:print(USAGE)
sys.exit(1)
with ZipFile(args[1], 'r') as zf:
badfile=zf.testzip()ifbadfile:print("The following enclosed file is corrupted: {!r}".format(badfile))print("Done testing")elif args[0] == '-e':if len(args) != 3:print(USAGE)
sys.exit(1)
with ZipFile(args[1], 'r') as zf:
zf.extractall(args[2])elif args[0] == '-c':if len(args) < 3:print(USAGE)
sys.exit(1)defaddToZip(zf, path, zippath):ifos.path.isfile(path):
zf.write(path, zippath, ZIP_DEFLATED)elifos.path.isdir(path):ifzippath:
zf.write(path, zippath)for nm inos.listdir(path):
addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm))#else: ignore
with ZipFile(args[1], 'w') as zf:for path in args[2:]:
zippath=os.path.basename(path)if notzippath:
zippath=os.path.basename(os.path.dirname(path))if zippath in ('', os.curdir, os.pardir):
zippath= ''addToZip(zf, path, zippath)if __name__ == "__main__":
main()