去网上找的处理库,直接上代码了
import os
import re
import xlrd
import xlwt
def set_style(name=None, height=220, bold=False):
""" 设置表格样式
:param name:
:param height:
:param bold:
:return:
"""
style = xlwt.XFStyle()
font = xlwt.Font()
font.name = name
font.bold = bold
font.color_index = 4
font.height = height
style.font = font
return style
def process_field(field):
""" 处理表格中的具体字段
:param field:
:return:
"""
# pattern = re.compile(r'([\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2})\.\d+')
# match = re.search(pattern, field)
# if match:
# ret = match.group(1)
# ret = ret.replace('-', '/')
# return ret
#
# return field
raise NotImplementedError
def read_write_file(read_file_name, write_file_name):
""" 读取表格并做处理再写入新的表格
:param read_file_name:
:param write_file_name:
:return:
"""
wb = xlrd.open_workbook(filename=read_file_name) # 打开文件
f = xlwt.Workbook()
sheet_read = wb.sheet_by_index(0) # 通过索引获取表格
sheet_write = f.add_sheet(sheet_read.name, cell_overwrite_ok=True)
rows = sheet_read.get_rows()
if debug:
print(sheet_read.name, sheet_read.nrows, sheet_read.ncols)
for n, row in enumerate(rows, 0):
fields = [process_field(field.value) if field.ctype == 1 else field.value for field in row]
for i in range(0, len(fields)):
sheet_write.write(n, i, fields[i], set_style('Calibri', 220, True))
# 以 .xlsx结尾的,不修改为xls打不开,没有详细研究,欢迎指出原因
write_file_name = ''.join([write_file_name[: -5], '.xls']) if write_file_name.endswith('.xlsx') else write_file_name
f.save(write_file_name)
def process_source_data(source_path, processed_path):
""" 检查输入路径,遍历目标文件
:param source_path:
:param processed_path:
:return:
"""
assert os.path.isdir(source_path), 'source_path {} is not exists or is not a dir'.format(source_path)
assert not os.path.isfile(processed_path), 'processed_path {} is a file'.format(processed_path)
if not os.path.exists(processed_path):
os.makedirs(processed_path)
for f in (item for item in os.listdir(source_path) if item.endswith(('.xls', '.xlsx'))):
read_write_file(os.path.join(source_path, f), os.path.join(processed_path, f))
if __name__ == '__main__':
debug = True
process_source_data('data_source', 'data_processed')