python压缩_如何在python中压缩非常大的文件

I would like to zip a couple of files that may amount to about 99 GB using python. Please what is the most efficient way to do this using the zipfile library. This is a sample code I have

with gcs.open(zip_file_name, 'w', content_type=b'application/zip') as f:

with zipfile.ZipFile(f, 'w') as z:

for file in files:

is_owner = (is_page_allowed_to_visitor(page, visitor) or (file.owner_id == visitor.id) )

if is_owner:

file.show = True

elif file.available_from:

if file.available_from > datetime.now():

file.show = False

elif file.available_to:

if file.available_to < datetime.now():

file.show = False

else:

file.show = True

if file.show:

file_name = "/%s/%s" % (gcs_store.get_bucket_name(), file.gcs_name)

gcs_reader = gcs.open(file_name, 'r')

z.writestr('%s-%s' %(file.created_on, file.name), gcs_reader.read() )

gcs_reader.close()

f.close() #closing zip file

Some points to note:

1) I am using the google app engine to host the files so I cannot use the zipfile.write() method. I can only get the file contents in bytes.

Thanks in advance

解决方案

I have added a new method to the zipfile library. This enhanced zipfile library is open source and can be found on github (EnhancedZipFile). I added a new method with the inspiration from the zipfile.write() method and the zipfile.writestr()method

def writebuffered(self, zinfo_or_arcname, file_pointer, file_size, compress_type=None):

if not isinstance(zinfo_or_arcname, ZipInfo):

zinfo = ZipInfo(filename=zinfo_or_arcname,

date_time=time.localtime(time.time())[:6])

zinfo.compress_type = self.compression

if zinfo.filename[-1] == '/':

zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x

zinfo.external_attr |= 0x10 # MS-DOS directory flag

else:

zinfo.external_attr = 0o600 << 16 # ?rw-------

else:

zinfo = zinfo_or_arcname

zinfo.file_size = file_size # Uncompressed size

zinfo.header_offset = self.fp.tell() # Start of header bytes

self._writecheck(zinfo)

self._didModify = True

fp = file_pointer

# Must overwrite CRC and sizes with correct data later

zinfo.CRC = CRC = 0

zinfo.compress_size = compress_size = 0

# Compressed size can be larger than uncompressed size

zip64 = self._allowZip64 and \

zinfo.file_size * 1.05 > ZIP64_LIMIT

self.fp.write(zinfo.FileHeader(zip64))

if zinfo.compress_type == ZIP_DEFLATED:

cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,

zlib.DEFLATED, -15)

else:

cmpr = None

file_size = 0

while 1:

buf = fp.read(1024 * 8)

if not buf:

break

file_size = file_size + len(buf)

CRC = crc32(buf, CRC) & 0xffffffff

if cmpr:

buf = cmpr.compress(buf)

compress_size = compress_size + len(buf)

self.fp.write(buf)

if cmpr:

buf = cmpr.flush()

compress_size = compress_size + len(buf)

self.fp.write(buf)

zinfo.compress_size = compress_size

else:

zinfo.compress_size = file_size

zinfo.CRC = CRC

zinfo.file_size = file_size

if not zip64 and self._allowZip64:

if file_size > ZIP64_LIMIT:

raise RuntimeError('File size has increased during compressing')

if compress_size > ZIP64_LIMIT:

raise RuntimeError('Compressed size larger than uncompressed size')

# Seek backwards and write file header (which will now include

# correct CRC and file sizes)

position = self.fp.tell() # Preserve current position in file

self.fp.flush()

self.filelist.append(zinfo)

self.NameToInfo[zinfo.filename] = zinfo

Points to note

I am a newbie in python so the code I wrote above may not be very optimized.

Please contribute to the project on github here https://github.com/najela/EnhancedZipFile

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值