批量解压zip

FOAF-lambda

已于 2023-10-09 17:19:16 修改

阅读量142

点赞数

文章标签： python zipfile

于 2023-06-28 15:51:59 首次发布

本文链接：https://blog.csdn.net/lwdfzr/article/details/131438304

版权

def file_encoding(filepath):# 获取文件编码
    with open(filepath, 'rb') as f:
        files_type = chardet.detect(f.read())
    if files_type['encoding']:
        encoding = files_type['encoding']
        if encoding.lower().startswith('utf-'):
            encoding = 'UTF-8-SIG'
    else:
        encoding = 'UTF-8-SIG'
    return encoding

def zip_file(dir_path, save_path):
    """
    :param dir_path: 文件夹/文件路径
    :param save_path: 压缩文件保存路径
    """
    import zipfile
    if not os.path.exists(dir_path):
        return
    my_zipfile = zipfile.ZipFile(save_path, 'w')
    my_zipfile.write(dir_path, compress_type=zipfile.ZIP_DEFLATED)
    my_zipfile.close()

def upzip_file(zip_path=None, unzip_path=None):
    """
    :zip_path 压缩文件路径
    :unzip_path 解压文件路径
    :return 解压 zip 文件,返回所有解压文件夹下的路径
    """
    zip_file = zipfile.ZipFile(zip_path)
    if not os.path.isdir(unzip_path):
        os.mkdir(unzip_path)
    for names in zip_file.namelist():
        zip_file.extract(names, unzip_path)
    zip_file.close()
    return [os.path.join(unzip_path, i).replace('\\', '/') for i in zip_file.namelist()]

def get_files(path, file_list=None):
    # 获取文件夹下的所有文件
    if file_list is None:
        file_list = []
    import os
    for i in os.listdir(path):
        path1 = os.path.join(path, i)
        if os.path.isdir(path1):
            get_files(path1, file_list)
        elif os.path.isfile(path1):
            file_list.append(path1)
    return file_list

def str_to_num(strings):
    # 提取字符串中的数字，替换掉逗号
    import re
    req = re.compile('(\d*-?\,?\d*\,?\d*\,?\d*\,?\d+\.?\d*)')
    num = req.findall(str(strings))
    if num:
        num = num[0].replace(',', '')
    elif not strings:
        return 0
    else:
        num = strings
    return str(num)

def read_csv(self, filepath=None, index_keys=[]):
    """
    :param filepath: 文件夹路径
    :param index_keys: 判断标题的关键字
    :return 返回数据列表和标题
    """
    encodings = fileTools.file_encoding(filepath)
    if encodings == 'GB2312':
        encodings = 'gbk'

    title_list = []
    file_list = []
    try:
        with codecs.open(filepath, encoding=encodings) as f1:
            for row in csv.reader(f1, skipinitialspace=True):
                pass
    except Exception as e:
        print(e)
        encodings = 'UTF-8-SIG'
    index = 0
    title_index = 0
    title_temp = False
    with codecs.open(filepath, encoding=encodings) as f:
        for row in csv.reader(f, skipinitialspace=True):
            temp = []
            for i in row:
                if isinstance(i, str):
                    i = i.strip().replace('\n', '').replace('\xa0', ' ').replace('"', '').strip()
                    if i.endswith('\t'):
                        i = i[:-2]
                    if i == '':
                        i = None
                    temp.append(i)
                else:
                    temp.append(i)
            for key in index_keys:
                if key in temp:
                    title_list = temp
                    title_index = index + 1
                    title_temp = True
                    break
            if title_temp and index >= title_index:
                file_list.append(temp)
                # 将结果转成字典
                # file_list.append({k: v for k, v in zip(title_list, temp)})
            index += 1
    return file_list, title_list

def read_excel(self, filepath=None, sheet_name=None, index_keys=[]):
    """
    :param filepath: 文件路径
    :param sheet_name: 表名
    :param index_keys: 判断标题的关键字
    """
    wb = xlrd.open_workbook(filepath)
    if not sheet_name:
        sheet_name = wb.sheet_names()[0]
    sh = wb.sheet_by_name(sheet_name)
    file_list = []
    title = []
    index = 0
    title_index = 0
    title_temp = False
    for r in range(sh.nrows):
        row = sh.row_values(r)
        temp = []
        for i in row:
            if isinstance(i, str):
                i = i.strip().replace('\n', '').replace('\xa0', ' ').strip()
                if i.endswith('\t'):
                    i = i[:-2]
                if i == '':
                    i = None
                temp.append(i)
            else:
                temp.append(i)
        # 判断标题
        for key in index_keys:
            if key in row:
                title = temp
                title_index = index + 1
                title_temp = True
                break
        if title_temp and index >= title_index:
            file_list.append(temp)
            # 将结果转成字典
            # file_list.append({k: v for k, v in zip(title_list, temp)})
        index += 1
    return file_list, title

def list_of_groups(self, list_info, per_list_len):
    # 将一个列表切分成多个小列表
    list_of_group = zip(*(iter(list_info),) * per_list_len)
    end_list = [list(i) for i in list_of_group]
    count = len(list_info) % per_list_len
    end_list.append(list_info[-count:]) if count != 0 else end_list
    return end_list

def untar_file(zip_file_path, unzip_file_path):
    """
    :param zip_file_path:  .tar 压缩文件
    :param unzip_file_path: 解压后的路径
    :return: 
    """
    tar = tarfile.open(zip_file_path)  # is_tarfile
    name_list = tar.getnames()
    if not os.path.isdir(unzip_file_path):
        os.mkdir(unzip_file_path)
    for name in name_list:
        tar.extract(name, unzip_file_path)
    tar.close()
    return name_list

def upzip_file_new(self, zip_path=None, unzip_path=None):
    paths = []
    if not os.path.exists(unzip_path):
        os.mkdir(unzip_path)
    with zipfile.ZipFile(file=zip_path, mode='r') as zf:
        for old_name in zf.namelist():
            file_size = zf.getinfo(old_name).file_size
            # 由于源码遇到中文是cp437方式，所以解码成gbk，windows即可正常
            new_name = old_name.encode('cp437').decode('gbk')
            # 拼接文件的保存路径
            new_path = os.path.join(unzip_path, new_name)
            paths.append(new_path)
            # 判断文件是文件夹还是文件
            if file_size > 0:
                # 是文件，通过open创建文件，写入数据
                with open(file=new_path, mode='wb') as f:
                    # zf.read 是读取压缩包里的文件内容
                    f.write(zf.read(old_name))
            else:
                # 是文件夹，就创建
                os.mkdir(new_path)
    return paths

FOAF-lambda

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
批量解压zip

param zip_file_path: .tar 压缩文件。:return 解压 zip 文件,返回所有解压文件夹下的路径。:param unzip_file_path: 解压后的路径。:param index_keys: 判断标题的关键字。:param index_keys: 判断标题的关键字。:param filepath: 文件夹路径。:param filepath: 文件路径。:param sheet_name: 表名。:unzip_path 解压文件路径。:zip_path 压缩文件路径。
复制链接

扫一扫