具体流程:使用库re 正则表达式 筛选出要处理的txt文件,再运用库os 读取txt文件,并运用库openpyxl,pythoncom,win32com导入数据到Excel文件中计算出数据后保存,最后再使用库threading 多线程加快数据处理速度。
效果: 开发出这个程序能将一天繁琐的手动处理Excel数据缩短为几分钟到十几分钟,并且为电脑自动完成。
# _*_ coding:utf-8 _*_
# 开发团队:admin
# 开发人员:admin
# 开发时间:2020/4/15 18:30
# 文件名称:__init__.py.py
# 开发工具:PyCharm
import datetime
import openpyxl
import os
import re
import pythoncom
import win32com
from win32com.client import constants as c # 旨在直接使用VBA常数
from threading import Thread, Lock
# path = r"D:\python_projects\Excel读取"
path = os.path.abspath(".")
TxtName = ""
# ExcelName = ""
num_times = 2
# t = ""
txt_name = ""
save_txt_times = 1
theadlist = []
bankLock = Lock()
def open_txt(txt_name):
global save_txt_times
# 加载 excel 文件
hz = openpyxl.load_workbook('J_V曲线分析.xlsx')
# 加载RawData标签
sheet1 = hz['RawData']
# 打开txt文件
# f_name = TxtName
f_name = "%s.txt" % txt_name
print(f_name)
with open(f_name, 'r+', encoding='utf-8') as f:
raw_data = [i[:-1].split(',') for i in f.readlines()]
# print(raw_data)
s = [s for s in raw_data if 'Potential/V' in s]
# print(s) # 双层列表
# print(raw_data.index(s[0])) # 列表索引值
raw_index = raw_data.index(s[0])
# print(raw_index) # 列表索引值
# print(raw_data[raw_data.index(s[0])]) # 单层列表
# print(raw_data[raw_index][0]) # 列表的第一个
raw_row = raw_data[raw_index][0]
# print(raw_row)
# print(raw_data[raw_data.index(s[0])][1].strip()) # 列表的第二个
raw_col = raw_data[raw_index][1].strip() # 列表的第二个
# print(raw_col)
sheet1['A3'] = raw_data[2][0]
sheet1['A18'] = raw_row
sheet1['B18'] = raw_col
cell_num = 20
for data in raw_data[raw_index + 2:]:
# print(data)
sheet1.cell(cell_num, 1).value = float(data[0])
sheet1.cell(cell_num, 2).value = float(data[1])
cell_num = cell_num + 1
# print(cell_num)
# 清空下面的内容
sheet1.delete_rows(cell_num, 300)
# 设置保存的名字
txt_fin_name = "%s.xlsx" % txt_name
# 保存
hz.save(txt_fin_name)
thread = Thread(target=deposit,
args=(save_txt_times, txt_name)
)
thread.start()
save_txt_times += 1
# 把线程对象都存储到 threadlist中
theadlist.append(thread)
def detect_txt():
global TxtName, txt_name
F = []
for root, dirs, files in os.walk(path):
# print root
# print dirs
for file in files:
# print file.decode('gbk') #文件名中有中文字符时转码
# 识别txt文件
if os.path.splitext(file)[1] == '.txt':
# t为文件名
txt_name = os.path.splitext(file)[0]
# print(t) # 打印所有py格式的文件名
# 正则表达式匹配开头为数字的txt文件
a = re.match(r'^\d', txt_name)
# print(a)
if a is not None:
# print(txt_name)
TxtName = file
# print(TxtName)
open_txt(txt_name)
F.append(txt_name) # 将所有的文件名添加到L列表中
return F, txt_name # 返回L列表
def ExcelSum(ExcelName, num_times, t):
# 加载 excel 文件
hz = openpyxl.load_workbook('数据汇总表格.xlsx')
sj = openpyxl.load_workbook(ExcelName, data_only=True)
# 得到sheet对象
sheet1 = hz['Sheet1']
sheetJV = sj['J-V']
Jsc = sheetJV['E2'].value
# print(Jsc, "jsc")
Voc = sheetJV['F2'].value
FF = sheetJV['G2'].value
PCE = sheetJV['H2'].value
# print(num_times)
sheet1.cell(num_times, 1).value = Jsc
sheet1.cell(num_times, 2).value = Voc
sheet1.cell(num_times, 3).value = FF
sheet1.cell(num_times, 4).value = PCE
sheet1.cell(num_times, 5).value = t
print(ExcelName)
# 指定不同的文件名,可以另存为别的文件
hz.save('数据汇总表格.xlsx')
def file_name():
# 等待保存完成
for thread in theadlist:
thread.join()
# global ExcelName, t
global num_times
F = []
for root, dirs, files in os.walk(path):
# print root
# print dirs
for file in files:
# print file.decode('gbk') #文件名中有中文字符时转码
if os.path.splitext(file)[1] == '.xlsx':
t = os.path.splitext(file)[0]
# print(t) # 打印所有py格式的文件名
# if t == re.match(r'[0-9]', t, re.M | re.I):
a = re.match(r'^\d', t)
# print(a)
if a != None:
# print(file)
ExcelName = file
# re_save(t)
ExcelSum(ExcelName, num_times, t)
num_times += 1
# F.append(t) # 将所有的文件名添加到L列表中
# return F, ExcelName, t # 返回L列表
def re_save(t):
current_address = os.path.abspath('.')
excel_address = os.path.join(current_address, "%s.xlsx") % t
# print(current_address)
xl_app = win32com.client.gencache.EnsureDispatch("Excel.Application") # 若想引用常数的话使用此法调用Excel
xl_app.Visible = False # 是否显示Excel文件
wb = xl_app.Workbooks.Open(excel_address)
# a = "%s.xlsx" % t
print('a')
wb.Save()
wb.Close()
# 定义一个函数,作为新线程执行的入口函数
def deposit(thead_idx, t):
bankLock.acquire()
pythoncom.CoInitialize()
current_address = os.path.abspath('.')
excel_address = os.path.join(current_address, "%s.xlsx") % t
# print(current_address)
# xl_app = win32com.client.gencache.EnsureDispatch("Excel.Application") # 若想引用常数的话使用此法调用Excel
xl_app = win32com.client.DispatchEx('Excel.Application') # 若想引用常数的话使用此法调用Excel
xl_app.Visible = False # 是否显示Excel文件
wb = xl_app.Workbooks.Open(excel_address)
# a = "%s.xlsx" % t
wb.Save()
wb.Close()
bankLock.release()
print(f'保存{t}结束')
print(f'子线程 {thead_idx} 结束')
def clear_up():
hz = openpyxl.load_workbook('数据汇总表格.xlsx') # 打开Excel表格
# 得到sheet对象
sheet1 = hz['Sheet1']
sheet1.delete_rows(2, 200) # 清空旧数据
hz.save('数据汇总表格.xlsx') # 保存,准备存入新数据
print()
def save_another():
hz = openpyxl.load_workbook('数据汇总表格.xlsx') # 打开Excel表格
# 得到sheet对象
sheet1 = hz['Sheet1']
time = datetime.datetime.now()
time_now = time.strftime("%Y-%m-%d") # 以当前日期保存Excel
hz.save('数据汇总表格%s.xlsx' % time_now)
# print()
# print('主线程结束')
def main():
detect_txt()
clear_up() # 清除旧数据
file_name()
save_another() # 插入另存为函数
if __name__ == '__main__':
print("by ZBC")
main()