"""Read and write ZIP files.
XXX references to utf-8 need further investigation."""
importioimportosimportreimportimportlib.utilimportsysimporttimeimportstatimportshutilimportstructimportbinasciitry:importthreadingexceptImportError:importdummy_threading as threadingtry:import zlib #We may need its compression method
crc32 =zlib.crc32exceptImportError:
zlib=None
crc32=binascii.crc32try:import bz2 #We may need its compression method
exceptImportError:
bz2=Nonetry:import lzma #We may need its compression method
exceptImportError:
lzma=None__all__ = ["BadZipFile", "BadZipfile", "error","ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA","is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]classBadZipFile(Exception):pass
classLargeZipFile(Exception):"""Raised when writing a zipfile, the zipfile requires ZIP64 extensions
and those extensions are disabled."""error= BadZipfile = BadZipFile #Pre-3.2 compatibility names
ZIP64_LIMIT= (1 << 31) - 1ZIP_FILECOUNT_LIMIT= (1 << 16) - 1ZIP_MAX_COMMENT= (1 << 16) - 1
#constants for Zip file compression methods
ZIP_STORED =0
ZIP_DEFLATED= 8ZIP_BZIP2= 12ZIP_LZMA= 14
#Other ZIP compression methods not supported
DEFAULT_VERSION= 20ZIP64_VERSION= 45BZIP2_VERSION= 46LZMA_VERSION= 63
#we recognize (but not necessarily support) all features up to that version
MAX_EXTRACT_VERSION = 63
#Below are some formats and associated data for reading/writing headers using#the struct module. The names and structures of headers/records are those used#in the PKWARE description of the ZIP file format:#http://www.pkware.com/documents/casestudies/APPNOTE.TXT#(URL valid as of January 2008)
#The "end of central directory" structure, magic number, size, and indices#(section V.I in the format document)
structEndArchive = b"<4s4H2LH"stringEndArchive= b"PK 05 06"sizeEndCentDir=struct.calcsize(structEndArchive)
_ECD_SIGNATURE=0
_ECD_DISK_NUMBER= 1_ECD_DISK_START= 2_ECD_ENTRIES_THIS_DISK= 3_ECD_ENTRIES_TOTAL= 4_ECD_SIZE= 5_ECD_OFFSET= 6_ECD_COMMENT_SIZE= 7
#These last two indices are not part of the structure as defined in the#spec, but they are used internally by this module as a convenience
_ECD_COMMENT = 8_ECD_LOCATION= 9
#The "central directory" structure, magic number, size, and indices#of entries in the structure (section V.F in the format document)
structCentralDir = "<4s4B4HL2L5H2L"stringCentralDir= b"PK 01 02"sizeCentralDir=struct.calcsize(structCentralDir)#indexes of entries in the central directory structure
_CD_SIGNATURE =0
_CD_CREATE_VERSION= 1_CD_CREATE_SYSTEM= 2_CD_EXTRACT_VERSION= 3_CD_EXTRACT_SYSTEM= 4_CD_FLAG_BITS= 5_CD_COMPRESS_TYPE= 6_CD_TIME= 7_CD_DATE= 8_CD_CRC= 9_CD_COMPRESSED_SIZE= 10_CD_UNCOMPRESSED_SIZE= 11_CD_FILENAME_LENGTH= 12_CD_EXTRA_FIELD_LENGTH= 13_CD_COMMENT_LENGTH= 14_CD_DISK_NUMBER_START= 15_CD_INTERNAL_FILE_ATTRIBUTES= 16_CD_EXTERNAL_FILE_ATTRIBUTES= 17_CD_LOCAL_HEADER_OFFSET= 18
#The "local file header" structure, magic number, size, and indices#(section V.A in the format document)
structFileHeader = "<4s2B4HL2L2H"stringFileHeader= b"PK 03 04"sizeFileHeader=struct.calcsize(structFileHeader)
_FH_SIGNATURE=0
_FH_EXTRACT_VERSION= 1_FH_EXTRACT_SYSTEM= 2_FH_GENERAL_PURPOSE_FLAG_BITS= 3_FH_COMPRESSION_METHOD= 4_FH_LAST_MOD_TIME= 5_FH_LAST_MOD_DATE= 6_FH_CRC= 7_FH_COMPRESSED_SIZE= 8_FH_UNCOMPRESSED_SIZE= 9_FH_FILENAME_LENGTH= 10_FH_EXTRA_FIELD_LENGTH= 11
#The "Zip64 end of central directory locator" structure, magic number, and size
structEndArchive64Locator = "<4sLQL"stringEndArchive64Locator= b"PKx06x07"sizeEndCentDir64Locator=struct.calcsize(structEndArchive64Locator)#The "Zip64 end of central directory" record, magic number, size, and indices#(section V.G in the format document)
structEndArchive64 = "<4sQ2H2L4Q"stringEndArchive64= b"PKx06x06"sizeEndCentDir64=struct.calcsize(structEndArchive64)
_CD64_SIGNATURE=0
_CD64_DIRECTORY_RECSIZE= 1_CD64_CREATE_VERSION= 2_CD64_EXTRACT_VERSION= 3_CD64_DISK_NUMBER= 4_CD64_DISK_NUMBER_START= 5_CD64_NUMBER_ENTRIES_THIS_DISK= 6_CD64_NUMBER_ENTRIES_TOTAL= 7_CD64_DIRECTORY_SIZE= 8_CD64_OFFSET_START_CENTDIR= 9
def_check_zipfile(fp):try:if_EndRecData(fp):return True #file has correct magic number
exceptOSError:pass
returnFalsedefis_zipfile(filename):"""Quickly see if a file is a ZIP file by checking the magic number.
The filename argument may be a file or file-like object too."""result=Falsetry:if hasattr(filename, "read"):
result= _check_zipfile(fp=filename)else:
with open(filename,"rb") as fp:
result=_check_zipfile(fp)exceptOSError:pass
returnresultdef_EndRecData64(fpin, offset, endrec):"""Read the ZIP64 end-of-archive records and use that to update endrec"""
try:
fpin.seek(offset- sizeEndCentDir64Locator, 2)exceptOSError:#If the seek fails, the file is not large enough to contain a ZIP64
#end-of-archive record, so just return the end record we were given.
returnendrec
data=fpin.read(sizeEndCentDir64Locator)if len(data) !=sizeEndCentDir64Locator:returnendrec
sig, diskno, reloff, disks=struct.unpack(structEndArchive64Locator, data)if sig !=stringEndArchive64Locator:returnendrecif diskno != 0 or disks != 1:raise BadZipFile("zipfiles that span multiple disks are not supported")#Assume no 'zip64 extensible data'
fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
data=fpin.read(sizeEndCentDir64)if len(data) !=sizeEndCentDir64:returnendrec
sig, sz, create_version, read_version, disk_num, disk_dir,
dircount, dircount2, dirsize, diroffset=struct.unpack(structEndArchive64, data)if sig !=stringEndArchive64:returnendrec#Update the original endrec using data from the ZIP64 record
endrec[_ECD_SIGNATURE] =sig
endrec[_ECD_DISK_NUMBER]=disk_num
endrec[_ECD_DISK_START]=disk_dir
endrec[_ECD_ENTRIES_THIS_DISK]=dircount
endrec[_ECD_ENTRIES_TOTAL]=dircount2
endrec[_ECD_SIZE]=dirsize
endrec[_ECD_OFFSET]=diroffsetreturnendrecdef_EndRecData(fpin):"""Return data from the "End of Central Directory" record, or None.
The data is a list of the nine items in the ZIP "End of central dir"
record followed by a tenth item, the file seek offset of this record."""
#Determine file size
fpin.seek(0, 2)
filesize=fpin.tell()#Check to see if this is ZIP file with no archive comment (the
#"end of central directory" structure should be the last item in the
#file if this is the case).
try:
fpin.seek(-sizeEndCentDir, 2)exceptOSError:returnNone
data=fpin.read()if (len(data) == sizeEndCentDir anddata[0:4] == stringEndArchive anddata[-2:] == b"