def file_encoding(filepath):# 获取文件编码 with open(filepath, 'rb') as f: files_type = chardet.detect(f.read()) if files_type['encoding']: encoding = files_type['encoding'] if encoding.lower().startswith('utf-'): encoding = 'UTF-8-SIG' else: encoding = 'UTF-8-SIG' return encoding def zip_file(dir_path, save_path): """ :param dir_path: 文件夹/文件路径 :param save_path: 压缩文件保存路径 """ import zipfile if not os.path.exists(dir_path): return my_zipfile = zipfile.ZipFile(save_path, 'w') my_zipfile.write(dir_path, compress_type=zipfile.ZIP_DEFLATED) my_zipfile.close() def upzip_file(zip_path=None, unzip_path=None): """ :zip_path 压缩文件路径 :unzip_path 解压文件路径 :return 解压 zip 文件,返回所有解压文件夹下的路径 """ zip_file = zipfile.ZipFile(zip_path) if not os.path.isdir(unzip_path): os.mkdir(unzip_path) for names in zip_file.namelist(): zip_file.extract(names, unzip_path) zip_file.close() return [os.path.join(unzip_path, i).replace('\\', '/') for i in zip_file.namelist()]
def get_files(path, file_list=None): # 获取文件夹下的所有文件 if file_list is None: file_list = [] import os for i in os.listdir(path): path1 = os.path.join(path, i) if os.path.isdir(path1): get_files(path1, file_list) elif os.path.isfile(path1): file_list.append(path1) return file_list def str_to_num(strings): # 提取字符串中的数字,替换掉逗号 import re req = re.compile('(\d*-?\,?\d*\,?\d*\,?\d*\,?\d+\.?\d*)') num = req.findall(str(strings)) if num: num = num[0].replace(',', '') elif not strings: return 0 else: num = strings return str(num)
def read_csv(self, filepath=None, index_keys=[]): """ :param filepath: 文件夹路径 :param index_keys: 判断标题的关键字 :return 返回数据列表和标题 """ encodings = fileTools.file_encoding(filepath) if encodings == 'GB2312': encodings = 'gbk' title_list = [] file_list = [] try: with codecs.open(filepath, encoding=encodings) as f1: for row in csv.reader(f1, skipinitialspace=True): pass except Exception as e: print(e) encodings = 'UTF-8-SIG' index = 0 title_index = 0 title_temp = False with codecs.open(filepath, encoding=encodings) as f: for row in csv.reader(f, skipinitialspace=True): temp = [] for i in row: if isinstance(i, str): i = i.strip().replace('\n', '').replace('\xa0', ' ').replace('"', '').strip() if i.endswith('\t'): i = i[:-2] if i == '': i = None temp.append(i) else: temp.append(i) for key in index_keys: if key in temp: title_list = temp title_index = index + 1 title_temp = True break if title_temp and index >= title_index: file_list.append(temp) # 将结果转成字典 # file_list.append({k: v for k, v in zip(title_list, temp)}) index += 1 return file_list, title_list def read_excel(self, filepath=None, sheet_name=None, index_keys=[]): """ :param filepath: 文件路径 :param sheet_name: 表名 :param index_keys: 判断标题的关键字 """ wb = xlrd.open_workbook(filepath) if not sheet_name: sheet_name = wb.sheet_names()[0] sh = wb.sheet_by_name(sheet_name) file_list = [] title = [] index = 0 title_index = 0 title_temp = False for r in range(sh.nrows): row = sh.row_values(r) temp = [] for i in row: if isinstance(i, str): i = i.strip().replace('\n', '').replace('\xa0', ' ').strip() if i.endswith('\t'): i = i[:-2] if i == '': i = None temp.append(i) else: temp.append(i) # 判断标题 for key in index_keys: if key in row: title = temp title_index = index + 1 title_temp = True break if title_temp and index >= title_index: file_list.append(temp) # 将结果转成字典 # file_list.append({k: v for k, v in zip(title_list, temp)}) index += 1 return file_list, title
def list_of_groups(self, list_info, per_list_len): # 将一个列表切分成多个小列表 list_of_group = zip(*(iter(list_info),) * per_list_len) end_list = [list(i) for i in list_of_group] count = len(list_info) % per_list_len end_list.append(list_info[-count:]) if count != 0 else end_list return end_list
def untar_file(zip_file_path, unzip_file_path): """ :param zip_file_path: .tar 压缩文件 :param unzip_file_path: 解压后的路径 :return: """ tar = tarfile.open(zip_file_path) # is_tarfile name_list = tar.getnames() if not os.path.isdir(unzip_file_path): os.mkdir(unzip_file_path) for name in name_list: tar.extract(name, unzip_file_path) tar.close() return name_list
def upzip_file_new(self, zip_path=None, unzip_path=None): paths = [] if not os.path.exists(unzip_path): os.mkdir(unzip_path) with zipfile.ZipFile(file=zip_path, mode='r') as zf: for old_name in zf.namelist(): file_size = zf.getinfo(old_name).file_size # 由于源码遇到中文是cp437方式,所以解码成gbk,windows即可正常 new_name = old_name.encode('cp437').decode('gbk') # 拼接文件的保存路径 new_path = os.path.join(unzip_path, new_name) paths.append(new_path) # 判断文件是文件夹还是文件 if file_size > 0: # 是文件,通过open创建文件,写入数据 with open(file=new_path, mode='wb') as f: # zf.read 是读取压缩包里的文件内容 f.write(zf.read(old_name)) else: # 是文件夹,就创建 os.mkdir(new_path) return paths