python解压zip/rar文件并解决zip解压乱码问题
环境:python3.5、windows7
依赖包:zipfile、rarfile、windows下unrar.exe放到C:/Windows下或者linux下安装unrar
功能:自动解压zip、rar文件
编写过程中遇到的问题:
1、zip解压缩乱码问题,修改源码zipfile.py文件:
有两处修改,zip文件会把非u8文件解码成cp437,添加编码再解码成gbk后乱码问题解决;
if zinfo.flag_bits & 0x800:
# UTF-8 filename
fname_str = fname.decode(“utf-8”)
else:
fname_str = fname.decode(“cp437”)
#change by xiao,20190715
fname_str = fname_str.encode(‘cp437’).decode(‘gbk’)
2、windows下rar解压依赖unrar.exe,需要把rar安装的unrar.exe拷贝到C:/Windows下,linux下可通过pip进行安装;
代码如下:
#-*- coding: utf-8 -*-
import os, zipfile, sys
import rarfile
import re
def un_zip(filename):
"""unzip zip file"""
#print(filename)
#print(filename.encode('utf-8'))
try:
filename = filename.encode('cp437').decode('gbk')
except:
filename = filename.encode('utf-8').decode('utf-8')
zip_file = zipfile.ZipFile(filename, 'r')
if os.path.isdir(filename + "_process"):
pass
else:
os.mkdir(filename + "_process")
zip_file.extractall(path=filename+"_process")
"""此方法并不好用,虽然显示正常了,但是解压出现错误
for name in zip_file.namelist():
try:
name = name.encode('cp437').decode('gbk')
except:
name = name.encode('utf-8').decode('utf-8')
if ".rar" in name:
zip_file.extract(name.encode('gbk'), filename + "_process/")
"""
zip_file.close()
def un_rar(filename, outdir):
rf = rarfile.RarFile(filename)
rf.extractall(outdir)
def scan_dir_files(file_path):
file_list = []
for file_path, sub_dirs, filenames in os.walk(file_path):
if filenames:
for filename in filenames:
file_list.append(os.path.join(file_path, filename))
for sub_dir in sub_dirs:
scan_dir_files(sub_dir)
return file_list
if __name__ == '__main__':
dir = "比赛"
list = scan_dir_files(dir)
for file in list:
un_zip(file)
list = scan_dir_files(dir)
for file in list:
if ".rar" in file:
dir_rarfile = os.path.dirname(file)
#dir_rarfile = dir_rarfile.replace('.rar', '')
un_rar(file, dir_rarfile)