# encoding=utf-8
import re
import openpyxl
import sys
def llog_print_yzy(*msg):
last_index = len(msg) - 1
msg_str = ''
for i in range(last_index):
'''
定义函数不确定有多少参数时,可用*来表示多个参数
连接字符串的方式,可以让print的输出之间 去掉空格。
把'类型'放'值'前面,是因为可以对齐,否则用\t都不齐
不想要各个参数的打印,直接把下面这行打印去掉就好了:
'''
# print('params', i + 1, '----------- [类型]:', type(msg[i]), ',-----------[值]:', str(msg[i]))
msg_str += str(msg[i])
if i != last_index - 1:
msg_str += ', '
print('[Line]: ' + str(msg[last_index]), ', [message]: ', msg_str, end='\n\n')
# llog_print_yzy('fa啊发', 3, sys._getframe().f_lineno) # call
# ------------------------------------------
import shutil, os
file_path = r'D:\T\Documents\MobileFile\桌面\python学习\file_type'
GT_output = ''
ATC_output = ''
FUNC_output = ''
INST_output = ''
PREC_output = ''
''' 获取每一个log的内容 '''
def get_per_log(absolute_path):
''' 从txt里读取数据,会成为str类型,不管是多复杂的数据'''
txt_file = open(absolute_path, mode='r', encoding='utf-8') # r是read
txt_file_lines = txt_file.readlines() # 读取所有行
str_txt_file = ''
for line in range(0, len(txt_file_lines)):
# 去掉结尾的'\n' ,结果还是返回到lines里
# txt_file_lines[line] = txt_file_lines[line].split('\n')[0]
str_txt_file += txt_file_lines[line]
# llog_print_yzy(str_txt_file, sys._getframe().f_lineno)
return str_txt_file
dirlist = os.walk(file_path)
for root, dirs, files in dirlist:
for file in files:
# llog_print_yzy(os.path.join(root, file), sys._getframe().f_lineno)
# golden_output目录里,文件名字带log才考虑分析
if 'golden_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
str_txt_file = get_per_log(os.path.join(root, file))
re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
# llog_print_yzy(re_result, sys._getframe().f_lineno)
GT_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)
if 'atc_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
str_txt_file = get_per_log(os.path.join(root, file))
# llog_print_yzy(str_txt_file, sys._getframe().f_lineno)
re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
ATC_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)
if 'func_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
str_txt_file = get_per_log(os.path.join(root, file))
re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
# llog_print_yzy(re_result, sys._getframe().f_lineno)
FUNC_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)
if 'inst_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
str_txt_file = get_per_log(os.path.join(root, file))
re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
# llog_print_yzy(re_result, sys._getframe().f_lineno)
INST_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)
if 'prec_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
str_txt_file = get_per_log(os.path.join(root, file))
re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
# llog_print_yzy(re_result, sys._getframe().f_lineno)
PREC_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)
llog_print_yzy(GT_output, ATC_output, FUNC_output, INST_output, PREC_output, sys._getframe().f_lineno)
''' 默认已经排过序了。因为是字符串的排序,所以规定:xlsx文件的命名格式必须统一为2023-01-12-xxx.xlsx (两位数字) '''
# logging.info(new_list)
# new_list_len = len(new_list) # 获取xlsx文件的总个数
#
# GT_data = new_list[new_list_len - 2] # 倒数第二个文件名 GT_data
# Result_data = new_list[new_list_len - 1] # 最后一个文件名 Result_data
# logging.info('上周的数据GT_data:%s ' % GT_data)
# logging.info('这周的数据Result_data:%s ' % Result_data)
#
# shutil.copyfile(GT_data, './表/GT_data.xlsx') # 只需要指定文件名即可
# shutil.copyfile(Result_data, './表/Result_data.xlsx')
# logging.info('已成功 把最新两周的数据,同步到了\'表\'的路径下面,并重命名')
#
#
# file_result_path = r'D:\T\Documents\MobileFile\桌面\python学习\file_type\data\result.xlsx'
# wb = openpyxl.load_workbook(file_result_path, data_only=True) # 指定路径,打开工作簿 如果有公式的话,默认显示公式;data_only只读取数据.
# ws = wb.active # 打开工作表
#
# logging.info('ws.max_row: %s' % (ws.max_row))
# # 一行一行地 获取表格里的网络名字
# for excel_row in range(1, ws.max_row + 1):
# excel_net_name = ws.cell(excel_row, 1).value # 获取excel里每一行的网络名字
# logging.info('excel_net_name: %s' % (excel_net_name))
#
# # 捕获到,可能会有空行的情况
# if excel_net_name != None:
# try:
# str_1_fxquant = excel_net_name + '/atc_output/fxquant/' + excel_net_name
# # logging.info(str_1_fxquant)
# # print(str(lines))
#
# re_result = re.findall(str_1_fxquant + '_fxquant_npuexe.json.+}', str(lines))
# # logging.info('%s' % (re_result))
#
# # () get到匹配的结果
# inference_lantency = re.findall(r'inference_lantency\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# throughput = re.findall(r'throughput\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# bandwidth = re.findall(r'bandwidth\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# power = re.findall(r'power\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# load_time = re.findall(r'load_time\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#
# # logging.info('inference_lantency: %s' % (inference_lantency[0]))
# # logging.info('throughput: %s' % (throughput[0]))
# # logging.info('bandwidth: %s' % (bandwidth[0])) # 获取百分比的数字
# # 如果是19的板子,带宽是9.9G 928的话,就是27.8G
# bandwidth_tmp = "%.2f" % (float(bandwidth[0]) * 9.9 / 100)
# # logging.info(bandwidth_tmp)
# bandwidth[0] = str(bandwidth_tmp) + '(' + bandwidth[0] + '%)'
#
# # logging.info('power: %s' % (power[0]))
# # logging.info('load_time: %s' % (load_time[0]))
#
# # 写到表格里:
# ws.cell(excel_row, 7, inference_lantency[0])
# ws.cell(excel_row, 8, throughput[0])
# ws.cell(excel_row, 9, bandwidth[0])
# ws.cell(excel_row, 10, power[0])
# ws.cell(excel_row, 11, load_time[0])
#
# ''' 下面是fxnuquant的数据 '''
# str_1_fxnuquant = excel_net_name + '/atc_output/fxnuquant/' + excel_net_name
# # logging.info(str_1_fxnuquant)
# # print(str(lines))
#
# re_result = re.findall(str_1_fxnuquant + '_fxnuquant_npuexe.json.+}', str(lines))
# # logging.info('%s' % (re_result))
#
# # () get到匹配的结果
# inference_lantency = re.findall(r'inference_lantency\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# throughput = re.findall(r'throughput\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# bandwidth = re.findall(r'bandwidth\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# power = re.findall(r'power\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
# load_time = re.findall(r'load_time\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#
# # logging.info('inference_lantency: %s' % (inference_lantency[0]))
# # logging.info('throughput: %s' % (throughput[0]))
# # logging.info('bandwidth: %s' % (bandwidth[0])) # 获取百分比的数字
# # 如果是19的板子,带宽是9.9G 928的话,就是27.8G
# bandwidth_tmp = "%.2f" % (float(bandwidth[0]) * 9.9 / 100)
# # logging.info(bandwidth_tmp)
# bandwidth[0] = str(bandwidth_tmp) + '(' + bandwidth[0] + '%)'
#
# # logging.info('power: %s' % (power[0]))
# # logging.info('load_time: %s' % (load_time[0]))
#
# # 写到表格里:
# ws.cell(excel_row, 2, inference_lantency[0])
# ws.cell(excel_row, 3, throughput[0])
# ws.cell(excel_row, 4, bandwidth[0])
# ws.cell(excel_row, 5, power[0])
# ws.cell(excel_row, 6, load_time[0])
# except:
# # 如果有异常,就捕获,不影响其他net_name信息的获取
# logging.error(traceback.format_exc())
# wb.save(file_result_path)
yzy_智能日志分析
最新推荐文章于 2024-09-15 22:31:42 发布