不多说,直接上代码,将mht文件中的图片保存在当前目录,自动压缩。
import base64
import os
import zipfile
FILE_EXIST_AUTO_REWRITE = 1
IMG_NAME_REWRITE = 1
BASE64_LINE_THRESHOLD = 500
def saveImg(file_path, folder_path):
img_dic = dict() # key:img name, value:lines
skipped_dic = dict() # key:file name, value:encode
sifted_dic = dict() # key:img name, value:lines
with open(file_path, 'rb') as file:
line = file.readline().decode()
while line != '':
# seek to the header of base64 code
line = file.readline().decode()
if 'Boundary' in line and line[0] == '-':
content_type = file.readline().decode().replace(':', '').replace('/', ' ').split() # Content-Type
content_encode = file.readline().decode().replace(':', '').replace('/', ' ').split() # Content-Location
content_location = file.readline().decode().replace(':', '').replace('/', ' ').split() # Content-Transfer-Encoding
if content_type == [] or content_encode == [] or content_location == []:
continue
file_type = content_type[-1]
file_encode = content_encode[-1]
file_name = content_location[-1].split('?')[0]
img_name = ''
# file type is image
if content_type[1] == 'image' and file_encode == 'base64':
line = file.readline() # blank line after Content-Location
line = file.readline().decode().replace('\n', '') # first line of base64
base64_str = line
lines = 0
while True:
line = file.readline().decode()
lines += 1
base64_str += line.replace('\n', '')
if len(line) <= 2: # blank line after base64 code has 2 bytes
break
# img is too small
if lines <= BASE64_LINE_THRESHOLD:
sifted_dic[file_name] = lines
# img fit the threshold
else:
if IMG_NAME_REWRITE == 1:
img_name = str(len(img_dic)+1).zfill(4) + '.' + file_type # fill leading_zero
elif IMG_NAME_REWRITE == 0:
img_name = file_name.split('.')[0] + '.' + file_type
img_dic[img_name] = lines
base64_decode = base64.b64decode(base64_str)
img = open(folder_path + '/' + img_name, "wb")
img.write(base64_decode)
img.close()
# file type is not image
else:
skipped_dic[file_name] = file_encode
# reach the end of file
print('[Saved Img] %d'%(len(img_dic)))
# print('[Saved Img] [name:lines]: \n', img_dic)
# print('[Sifted Img] [name:lines]: \n', sifted_dic)
# print('Skipped [name:encode]: ', skipped_dic)
def getDirSize(dir):
size = 0
for root, dirs, files in os.walk(dir):
size += sum([os.path.getsize(os.path.join(root, name)) for name in files])
return size
def zipImg(folder_path, folder_name):
zip_name = folder_name+'.zip'
zip = zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED)
for file in os.listdir(folder_path):
zip.write(folder_path + os.sep + file, file)
zip.close()
dir_size = getDirSize(folder_path)
zip_size = os.path.getsize(zip_name)
print(r'[Raw/Compressed] %.2f / %.2f MB'%(dir_size/1024/1024, zip_size/1024/1024))
def extractFile(FILE_NAME):
CUR_PATH = os.getcwd()
cur_file = os.path.join(CUR_PATH, FILE_NAME)
# file exist
if(os.path.isfile(cur_file)):
print("[File Name]", FILE_NAME)
print("[File Path]", CUR_PATH)
folder_name = '.'.join(FILE_NAME.split('.')[0:-1])
folder_path = os.path.join(CUR_PATH, folder_name)
# folder exist
if os.path.exists(folder_path):
if FILE_EXIST_AUTO_REWRITE == 1:
pass
elif FILE_EXIST_AUTO_REWRITE == 0:
print("Folder Exist:", folder_name, "Rewrite It? [y/n]", end=" ")
confirm = input()
if confirm == 'y':
pass
elif confirm == 'n':
return
# folder not exist, create it
else:
os.makedirs(os.path.join(CUR_PATH, folder_name))
print("[New Folder]", folder_name)
saveImg(cur_file, folder_path)
zipImg(folder_path, folder_name)
#file not exist, exit
else:
print("No This File!")
return
def getTargetFile():
file_list = os.listdir(os.getcwd())
target_file_list = []
for file in file_list:
if file.split('.')[-1] == 'mht':
target_file_list.append(file)
for file in target_file_list:
print('[Find File]', file)
print('[File Nums]', len(target_file_list))
return target_file_list
if __name__ == '__main__':
target_file_list = getTargetFile()
for index, file in enumerate(target_file_list):
print("\n------------------------------------------PROCESS [{}/{}]-----------------------------------".format(index+1, len(target_file_list)))
extractFile(file)