# python 3.7
import os
import re
import shutil
import openpyxl
import numpy
import numpy as np
# import pyautogui
import time
import os
from openpyxl import Workbook
from openpyxl.drawing.image import Image
import pandas as pd
from matplotlib import pyplot as plt
from openpyxl.reader.excel import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from scipy.stats import norm
# 图片路径,替换为你要匹配的图片
image_path = "path/to/your/image.png"
txt_infor = [] # 单个txt的所有行
match_list = [] # 单个txt的所有行匹配表达式
match_result_list = []
get_data_excel = r'D:\PycharmProjects\pythonProject\collectTxt\GetData.xlsx'
get_cllect_infor_excel = r'd:\CollectTestInfor.xlsx'
work_curr_dir = r'D:\PycharmProjects'
collect_txt_record_csv = r'D:\\123\\txt_record.csv'
collect_Sn_record_csv = r'D:\\123\\sn_record.csv'
result_step0_csv = r'D:\\123\\test_all_result0.csv'
result_step1_csv = r'D:\\123\\test_all_result1.csv'
result_step2_csv = r'D:\\123\\test_all_result2.csv'
result_step3_csv = r'D:\\123\\test_all_result3.csv'
result_step4_csv = r'D:\\123\\test_all_result4.csv'
result_step5_csv = r'D:\\123\\test_all_result5.xlsx'
result_step6_csv = r'D:\\123\\test_all_result6_End.xlsx'
Collect_step0_csv = r'D:\\123\\test_all_Collect0.csv'
Collect_step1_csv = r'D:\\123\\test_all_Collect1.csv'
Collect_step2_csv = r'D:\\123\\test_all_Collect2.csv'
txt_new_list = []
divlist = []
optionlist = []
option_temp_list = []
Setoptionlist = []
# 定义match数据
def define_match_infor():
match_result_list.clear()
for i in range(1, 6):
match_result_list.append("match" + str(i))
return match_list
def read_src_data_excel(excel_path):
# 读取Excel文件
df = pd.read_excel(excel_path, engine='openpyxl')
return df
def read_src_data_csv(excel_path):
# 读取Excel文件
df = pd.read_csv(excel_path)
return df
def find_txt_by_dir(directory, key):
file_names = []
file_names.clear()
if not os.path.exists(directory):
print(directory + " ,dir not exits")
return file_names
# 遍历目录及其子目录下所有文件
for root, dirs, files in os.walk(directory):
for file in files:
# 判断文件名是否以"flow"结尾
if file.endswith(key):
# 将符合条件的文件名添加到列表中
file_names.append(os.path.join(root, file))
print(file_names)
return file_names
def find_txt_by_dir(directory, key, regexp):
if regexp == '':
return find_txt_by_dir(directory, key)
file_names = []
file_names.clear()
if not os.path.exists(directory):
print(directory + " ,dir not exits")
return file_names
# 遍历目录及其子目录下所有文件
for root, dirs, files in os.walk(directory):
for file in files:
# 判断文件名是否以"flow"结尾
matches = re.findall(key, file)
if matches:
# 将符合条件的文件名添加到列表中
file_names.append(os.path.join(root, file))
print(file_names)
return file_names
def save_data_to_csv(file, data):
with open(file, 'a', encoding='utf-8') as f:
# 遍历数据列表,将每个元素写入文件
for line in data:
f.write(line + '')
def save_cust_data_to_csv(file, data):
with open(file, 'w', encoding='utf-8') as f:
# 遍历数据列表,将每个元素写入文件
for line in data:
f.write(line + '')
# 读txt
def read_txt_file(path):
txt_infor.clear()
if not os.path.exists(path):
return False
with open(path, 'r', encoding='utf-8') as file:
for line in file:
txt_infor.append(line.strip())
print(line.strip())
return True
# 查找关键字所有的行
def find_key_data(key):
for line in txt_infor:
if str(line).find(str(key)) != -1:
return line
return ""
#将关键字替换成空格
def replace_key_with_blank(key, line):
new_line = line.replace(key, ' ')
return new_line
# 将关键字之后的字符串保留
def find_keyword(s, keyword):
start = s.find(str(keyword))
if start != -1:
end = start + len(str(keyword))
return s[end:]
else:
return None
# 匹配数据
def match_data(str_infor, regexp):
# s = "16.230 [-100.300,]"
# pattern = r'[-+]?\d*\.\d+|\d+'
result = re.findall(regexp, str_infor)
if len(result) > 0:
return result[0]
return ""
# 匹配数据
def match_string(pattern, string):
result = re.search(pattern, string)
if result is not None:
matched_str = result.group() # 获取匹配到的子字符串
start_index = result.start() + len(matched_str) # 计算匹配结果所在位置的索引值
remaining_data = string[start_index:] # 提取剩余部分的数据
print("匹配到的字符串为:", matched_str)
return matched_str
else:
print("未能找到匹配的字符串")
return ""
# 截取匹配数据后的数据
def get_after_match_string(pattern, string):
result = re.search(pattern, string)
if result is not None:
matched_str = result.group() # 获取匹配到的子字符串
start_index = result.start() + len(matched_str) # 计算匹配结果所在位置的索引值
remaining_data = string[start_index:] # 提取剩余部分的数据
print("匹配结果之后的数据为:", remaining_data)
return remaining_data
else:
print("未能找到匹配的字符串")
return ""
# 从txt文件搜集数据信息
def collect_infor_from_txt(txt_path, get_data_xlsx, out_data_xlsx):
match_result_list.clear()
# 读取txt文件并记录到列表里
if not read_txt_file(txt_path):
print(txt_path + " ,file not exits")
return False
# 读取数据提取规则
pds = read_src_data_excel(get_data_xlsx)
# 遍历每一行数据
for index, row in pds.iterrows():
print(row['dataname'])
match_list.clear()
for key in row['match1':'match6']:
print(key)
match_list.append(key)
print(str(row['key']))
if str(row['key']) == 'nan':
print(str(row['key']) + '....')
break
row_infor = find_key_data(row['key'])
if row_infor == '':
continue
# 将匹配关键字替换掉
row_infor = find_keyword(row_infor, row['key'])
print('Line:' + row_infor)
for regexp in match_list:
if regexp == 'No_Math':
print(key + ' No_Math')
match_result_list.append(',')
continue
matchdata = match_string(regexp, row_infor)
if matchdata == "":
print(regexp, row_infor, " Fail")
return False
match_result_list.append(matchdata)
match_result_list.append(',')
row_infor_temp = get_after_match_string(regexp, row_infor)
row_infor = row_infor_temp
if len(match_result_list) == 0:
return True
match_result_list.append(txt_path)
match_result_list.append('\n')
print(match_result_list)
save_data_to_csv(out_data_xlsx, match_result_list)
return True
def collect_infor(txt_list, get_data_xlsx, out_data_xlsx):
txt_new_list.clear()
for txt in txt_list:
if not collect_infor_from_txt(txt, get_data_xlsx, out_data_xlsx):
return False
print(txt + " Finish")
txt_new_list.append(txt + '\n')
print(out_data_xlsx)
read_txt_file(out_data_xlsx)
txt_csv_data = []
txt_str = get_head(get_data_excel, 3)
txt_csv_data.append(txt_str)
print('=================================================================')
for lines in txt_infor:
txt_csv_data.append(lines + '\n')
print(lines)
print('=================================================================')
save_data_to_csv(result_step1_csv, txt_csv_data)
return True
# 根据目录,查找命名的文件列表,如果txt记录表里已经存在则剔除,否则返回全部数据
def check_txt_by_record(txt_path_dir, key, get_txt_csv):
# 根据目录,查找命名的文件列表
txt_list = find_txt_by_dir(txt_path_dir, key, 'rematch') # '_Flw.txt'
if not os.path.exists(get_txt_csv):
return txt_list
# 如果txt记录表里已经存在则剔除,否则返回全部数据
pds = read_src_data_csv(get_txt_csv)
list_new = []
# 遍历每一行数据
for index, row in pds.iterrows():
if txt_list.count(row[0]) == 0:
list_new.append(row[0])
return list_new
# 获取所有数据列的名称
def get_head(xlsx_path):
data_head = []
data_unit = ['Min', 'Max', 'value1', 'value2', 'value3']
# 从excel读取数据列名
pds = read_src_data_excel(xlsx_path) # r'D:\\GetData.xlsx'
for index, row in pds.iterrows():
data_head.append(row[2])
for key in data_unit:
data_head.append(key)
print(data_head)
# 按照csv的格式组织数据
csv_head_list = []
for iterm in data_head:
csv_head_list.append(str(iterm) + ',')
# 将列名数据组织成csv行数据。
csv_head_str = ''
for its in csv_head_list:
csv_head_str += its
csv_head_str += '\n'
print('==============================' + csv_head_str)
return csv_head_str
def get_head(xlsx_path,cloum_index):
data_head = []
data_unit = ['Min', 'Max', 'value1', 'value2', 'value3']
# 从excel读取数据列名
pds = read_src_data_excel(xlsx_path) # r'D:\\GetData.xlsx'
for index, row in pds.iterrows():
data_head.append(row[cloum_index])
for key in data_unit:
data_head.append(key)
print(data_head)
# 按照csv的格式组织数据
csv_head_list = []
for iterm in data_head:
csv_head_list.append(str(iterm) + ',')
# 将列名数据组织成csv行数据。
csv_head_str = ''
for its in csv_head_list:
csv_head_str += its
csv_head_str += '\n'
print('==============================' + csv_head_str)
return csv_head_str
def get_head_list(xlsx_path, cloum_index):
data_head = []
# 从excel读取数据列名
pds = read_src_data_excel(xlsx_path) # r'D:\\GetData.xlsx'
for index, row in pds.iterrows():
if str(row[cloum_index]) != 'nan':
data_head.append(row[cloum_index])
print(data_head)
return data_head
# 获取所有数据列的名称 r'D:\\GetData.xlsx'
def get_data_head(xlsx_path):
data_head = []
# 从excel读取数据列名
pds = read_src_data_excel(xlsx_path) # r'D:\\GetData.xlsx'
for index, row in pds.iterrows():
if str(row[2]) == 'nan':
break
data_head.append(row[2])
print(data_head)
# 按照csv的格式组织数据
csv_head_list = []
for iterm in data_head:
csv_head_list.append(str(iterm) + ',')
# 将列名数据组织成csv行数据。
csv_head_str = ''
for its in csv_head_list:
csv_head_str += its
csv_head_str += '\n'
print('==============================' + csv_head_str)
return csv_head_str
# 根据数列表名定位最小值,位于下一列,返回首个元素
def get_data_min(df, col_name):
print('------------------------------------------------------')
# 定位列名A后面一列的数据
column_name_a = df.columns[df.columns == col_name].tolist()[0]
next_column_index = df.columns.get_loc(column_name_a) + 1
next_column_data = df.iloc[0, next_column_index]
print(next_column_data)
return next_column_data
# 根据数列表名定位最大值,位于下一列,返回首个元素
def get_data_max(df, col_name):
print('------------------------------------------------------')
# 定位列名A后面一列的数据
column_name_a = df.columns[df.columns == col_name].tolist()[0]
next_column_index = df.columns.get_loc(column_name_a) + 2
next_column_data = df.iloc[0, next_column_index]
print(next_column_data)
return next_column_data
def get_colum_data(df, col_name):
print('-----------------get_colum_data-------------------------------------')
# 获取列名A第二行之后的所有元素
data_list = []
column_name_a = df.columns[df.columns == col_name].tolist()[0]
second_row_index = 2
elements_after_second_row = df.iloc[second_row_index + 1:, df.columns.get_loc(column_name_a)]
print(elements_after_second_row)
for key in elements_after_second_row:
data_list.append(key)
return data_list
def get_colum_data_low(df, col_name):
# 定位列名A后面一列的数据
column_name_a = df.columns[df.columns == col_name].tolist()[0]
next_column_index = df.columns.get_loc(column_name_a)
next_column_data = df.iloc[0, next_column_index]
print(next_column_data)
return next_column_data
def get_colum_data_high(df, col_name):
# 定位列名A后面二列的数据
column_name_a = df.columns[df.columns == col_name].tolist()[0]
next_column_index = df.columns.get_loc(column_name_a)
next_column_data = df.iloc[1, next_column_index]
print(next_column_data)
return next_column_data
# step1 收集所有的据
def test1(txt_path):
txt_new_list = check_txt_by_record(txt_path, '([A-Za-z0-9]{16}_[0-9]{6}.txt)', collect_txt_record_csv)
if len(txt_new_list) < 1:
print('No New Data')
return False
collect_infor(txt_new_list, get_data_excel, result_step0_csv)
temp_list = []
temp_sn_list = []
pattern = "[A-Za-z0-9]{16}"
for txt in txt_new_list:
temp_list.append(txt + '\n')
matches = re.findall(pattern, txt)
if matches:
# 提取条码
for index in matches:
temp_sn_list.append(index + '\n')
save_data_to_csv(collect_txt_record_csv, temp_list)
save_data_to_csv(collect_Sn_record_csv, temp_sn_list)
# step2 生成有效数据表,并保存
def test2():
data_head = []
# 读取数据
pds = read_src_data_excel(get_data_excel)
for index, row in pds.iterrows():
if str(row[3]) == 'nan':
break
data_head.append(row[3]) # 测试项名称
print(data_head)
# 组织数据表 测试项
pda = read_src_data_csv(result_step1_csv)
print(pda)
df1 = pda[data_head]
print('--------------------------------------------------')
print(df1)
print('--------------------------------------------------')
df1.to_csv(result_step2_csv, index=False)
def txt_data_class_by_process_name(proc_name, file_path):
print('txt_data_class_by_process_name')
pdf = read_src_data_csv(file_path)
filtered_data = pdf[pdf['Process Name'] == proc_name]
filtered_data.to_csv('output.csv', index=False)
def txt_data_class_by_date(date, file_path):
print('txt_data_class_by_time')
pdf = read_src_data_csv(file_path)
filtered_data = pdf[pdf['date'] == date]
filtered_data.to_csv('output.csv', index=False)
def txt_data_class_by_online(online, file_path):
print('txt_data_class_by_time')
pdf = read_src_data_csv(file_path)
filtered_data = pdf[pdf['date'] == online]
filtered_data.to_csv('output.csv', index=False)
def collect_txt_by_csv(des_dir):
pds = read_src_data_csv(result_step2_csv)
for index, row in pds.iterrows():
full_path = str(row['src_path'])
dir_name = str(row['Process Name'])
if full_path == 'nan':
continue
print(full_path)
if dir_name == 'nan':
full_path
print(dir_name)
file_name = os.path.basename(full_path)
tmp = os.path.dirname(full_path)
tmp = des_dir + '\\' + dir_name + '\\'
if not os.path.exists(tmp):
os.makedirs(tmp)
full_path_new = tmp + file_name
shutil.copy(full_path, full_path_new)
print(file_name)
print(tmp)
'''
df = pd.DataFrame({'date': ['2022-01-01', '2022-01-03', '2022-01-02','2022-01-01', '2022-01-03', '2022-01-02'],
'time': ['10:30:00', '14:45:00', '09:15:00','12:30:00', '14:45:00', '13:15:00']})
# 将日期和时间列转换为 datetime 类型
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
# 按日期和时间排序
df = df.sort_values('datetime')
print(df)
path1 = r'Z:/new_file.bat'
path2 = r'D:\\123\\new_file_3.bat'
#shutil.copy(path1, path2)
#collect_txt_by_csv('D:\\123\\test_all_result2_Test.csv','D:\\123\\')
'''
import configparser import os.path import random import sys import tkinter from tkinter import * from tkinter.ttk import * import collecttxt ini_path = '' app_name = '' log_path = '' log_path_list = [] config_path = '' class RedirectText: def __init__(self, text_widget): self.text_widget = text_widget def write(self, string): self.text_widget.insert(tkinter.END, string) # 自动隐藏滚动条 def scrollbar_autohide(bar, widget): def show(): bar.lift(widget) def hide(): bar.lower(widget) hide() widget.bind("<Enter>", lambda e: show()) bar.bind("<Enter>", lambda e: show()) widget.bind("<Leave>", lambda e: hide()) bar.bind("<Leave>", lambda e: hide()) class WinGUI(Tk): def __init__(self): super().__init__() self.__win() self.tk_button_run = self.__tk_button_run(self) self.tk_input_content = self.__tk_input_content(self) self.tk_button_add = self.__tk_button_add(self) self.tk_list_box_path = self.__tk_list_box_path(self) self.tk_button_update = self.__tk_button_update(self) self.tk_button_reset = self.__tk_button_reset(self) # self.tk_text_lrxoiufl = self.__tk_text_lrxoiufl(self) self.tk_button_delete = self.__tk_button_delete(self) self.show_log = self.__tk_text_url() sys.stdout = RedirectText(self.show_log) def __win(self): self.title(app_name) # 设置窗口大小、居中 width = 1000 height = 800 screenwidth = self.winfo_screenwidth() screenheight = self.winfo_screenheight() geometry = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2) self.geometry(geometry) self.resizable(width=False, height=False) def __tk_text_url(self): text = Text(self) # text.place(x=0, y=230, width=596, height=400) text.place(x=0, y=300, width=890, height=480) vbar = Scrollbar(self) text.configure(yscrollcommand=vbar.set) 'text.configure' text.tag_config('link', foreground='blue', underline=True) vbar.config(command=text.yview) # vbar.place(x=581, y=230, width=15, height=530) vbar.place(x=900, y=300, width=15, height=480) scrollbar_autohide(vbar, text) return text def scrollbar_autohide(self,vbar, hbar, widget): """自动隐藏滚动条""" def show(): if vbar: vbar.lift(widget) if hbar: hbar.lift(widget) def hide(): if vbar: vbar.lower(widget) if hbar: hbar.lower(widget) hide() widget.bind("<Enter>", lambda e: show()) if vbar: vbar.bind("<Enter>", lambda e: show()) if vbar: vbar.bind("<Leave>", lambda e: hide()) if hbar: hbar.bind("<Enter>", lambda e: show()) if hbar: hbar.bind("<Leave>", lambda e: hide()) widget.bind("<Leave>", lambda e: hide()) def v_scrollbar(self,vbar, widget, x, y, w, h, pw, ph): widget.configure(yscrollcommand=vbar.set) vbar.config(command=widget.yview) vbar.place(relx=(w + x) / pw, rely=y / ph, relheight=h / ph, anchor='ne') def h_scrollbar(self,hbar, widget, x, y, w, h, pw, ph): widget.configure(xscrollcommand=hbar.set) hbar.config(command=widget.xview) hbar.place(relx=x / pw, rely=(y + h) / ph, relwidth=w / pw, anchor='sw') def create_bar(self,master, widget,is_vbar,is_hbar, x, y, w, h, pw, ph): vbar, hbar = None, None if is_vbar: vbar = Scrollbar(master) self.v_scrollbar(vbar, widget, x, y, w, h, pw, ph) if is_hbar: hbar = Scrollbar(master, orient="horizontal") self.h_scrollbar(hbar, widget, x, y, w, h, pw, ph) self.scrollbar_autohide(vbar, hbar, widget) def __tk_input_content(self, parent): ipt = Entry(parent, ) ipt.place(x=8, y=20, width=880, height=32) return ipt def __tk_list_box_path(self, parent): lb = Listbox(parent) temp_list = [] if log_path != "": temp_list = log_path.split('|') else: print('None Data') for index in temp_list: if index != '': lb.insert(END, index) lb.place(x=11, y=66, width=880, height=222) # lb.pack(side=LEFT, fill=BOTH) scrollbar = Scrollbar(self, orient=HORIZONTAL, command=lb.xview) scrollbar.pack(side=BOTTOM, fill=X) lb.config(xscrollcommand=scrollbar.set) return lb def delete_selected_rows(self): selected_indices = self.tk_list_box_path.curselection() for index in selected_indices[::-1]: self.tk_list_box_path.delete(index) def delete_all_rows(self): self.tk_list_box_path.delete(0, END) def update_rows(self): log_path_infor = '' for index in range(self.tk_list_box_path.size()): log_path_infor = log_path_infor + self.tk_list_box_path.get(index) log_path_infor = log_path_infor + '|' print(self.tk_list_box_path.get(index)) log_path_infor = log_path_infor + '|' if log_path_infor != '|': log_path_infor = log_path_infor.replace('||', '') write_ini_value(config_path, 'Log_path', log_path_infor) else: log_path_infor = '' def add(self): txt = self.tk_input_content.get() if not os.path.exists(txt): print('not dir ....') return print('------------add----------- Pass') self.tk_list_box_path.insert(END,txt) self.place(x=11, y=66, width=530, height=222) return def run(self): for index in range(self.tk_list_box_path.size()): tmp_path = self.tk_list_box_path.get(index) print(tmp_path) collecttxt.test1(tmp_path) print(tmp_path) print('Run.....\r\n') print('Run.....\r\n') print('Run.....\r\n') print('Run.....\r\n') print('Run.....\r\n') print('Run.....\r\n') print('Run.....\r\n') collecttxt.test2() collecttxt.collect_txt_by_csv(os.getcwd()) #collecttxt.test3() #collecttxt.test4() #collecttxt.test4_html() return ''' analyzedtata.test2() analyzedtata.test3() analyzedtata.test4() analyzedtata.test4_html() analyzedtata.test5() analyzedtata.test6() ''' def __tk_button_add(self, parent): btn = Button(parent, text="添加", command=self.add) btn.place(x=900, y=20, width=90, height=30) return btn def __tk_button_delete(self,parent): btn = Button(parent, text="删除", command=self.delete_selected_rows) btn.place(x=900, y=70, width=90, height=30) return btn def __tk_button_run(self, parent): btn = Button(parent, text="运行", command=self.run) btn.place(x=900, y=120, width=90, height=30) return btn def __tk_button_update(self, parent): btn = Button(parent, text="更新", command=self.update_rows) btn.place(x=900, y=170, width=90, height=30) return btn def __tk_button_reset(self, parent): btn = Button(parent, text="重置", command=self.delete_all_rows) btn.place(x=900, y=240, width=90, height=30) return btn def __tk_text_lrxoiufl(self,parent): text = Text(parent) text.place(x=9, y=296, width=571, height=227) return text class Win(WinGUI): def __init__(self, controller): self.ctl = controller super().__init__() self.__event_bind() self.__style_config() self.ctl.init(self) def __event_bind(self): pass def __style_config(self): pass def read_ini_value(path, key): # 创建一个ConfigParser对象 config = configparser.ConfigParser() # 读取INI文件 config.read(path + 'config.ini') # 获取指定section和key的值 value = config.get(key, 'value') print(value) return value def write_ini_value(path, key, value): # 创建一个ConfigParser对象 config = configparser.ConfigParser() # 读取INI文件 config.read(path) # 修改指定section和key的值 config.set(key, 'value', value) # 将更改写回INI文件 with open(path, 'w') as configfile: config.write(configfile) return True if __name__ == "__main__": path_infor = os.getcwd() path_infor = path_infor + '\\' run_dir = path_infor ini_path = read_ini_value(path_infor,'current_path') app_name = read_ini_value(path_infor, 'app_name') log_path = read_ini_value(path_infor, 'Log_path') print('++:' + path_infor) print('++:' + app_name) config_path = path_infor + 'config.ini' if ini_path != path_infor + 'config.ini': write_ini_value(config_path, 'current_path', path_infor) # config_path win = WinGUI() win.mainloop()
config.ini
[app_name] value = app_run_txt [current_path] value = D:\PycharmProjects\pythonProject\collectTxt\ [Log_path] value = D:\Testlog\H16X
index | Type | key | dataname | match1 | match2 | match3 | match4 | match5 | match6 |
10 | MatchData | Process Name: | Process Name | (\S+) | No_Math | No_Math | No_Math | No_Math | No_Math |