yzy_智能日志分析

最新推荐文章于 2024-09-15 22:31:42 发布
星月故里101101
最新推荐文章于 2024-09-15 22:31:42 发布
阅读量256
点赞数 4
文章标签： python
本文链接：https://blog.csdn.net/T______190__/article/details/136578655
版权
# encoding=utf-8
import re
import openpyxl
import sys


def llog_print_yzy(*msg):
    last_index = len(msg) - 1
    msg_str = ''

    for i in range(last_index):
        '''
        定义函数不确定有多少参数时，可用*来表示多个参数
          连接字符串的方式，可以让print的输出之间 去掉空格。
          把'类型'放'值'前面，是因为可以对齐，否则用\t都不齐
          不想要各个参数的打印，直接把下面这行打印去掉就好了：
        '''
        # print('params', i + 1, '----------- [类型]:', type(msg[i]), ',-----------[值]:', str(msg[i]))

        msg_str += str(msg[i])
        if i != last_index - 1:
            msg_str += ', '
    print('[Line]: ' + str(msg[last_index]), ', [message]: ', msg_str, end='\n\n')


# llog_print_yzy('fa啊发', 3, sys._getframe().f_lineno)       # call


# ------------------------------------------
import shutil, os

file_path = r'D:\T\Documents\MobileFile\桌面\python学习\file_type'

GT_output = ''
ATC_output = ''
FUNC_output = ''
INST_output = ''
PREC_output = ''

''' 获取每一个log的内容 '''


def get_per_log(absolute_path):
    ''' 从txt里读取数据，会成为str类型，不管是多复杂的数据'''
    txt_file = open(absolute_path, mode='r', encoding='utf-8')  # r是read
    txt_file_lines = txt_file.readlines()  # 读取所有行
    str_txt_file = ''

    for line in range(0, len(txt_file_lines)):
        # 去掉结尾的'\n' ，结果还是返回到lines里
        # txt_file_lines[line] = txt_file_lines[line].split('\n')[0]
        str_txt_file += txt_file_lines[line]
    # llog_print_yzy(str_txt_file, sys._getframe().f_lineno)
    return str_txt_file


dirlist = os.walk(file_path)
for root, dirs, files in dirlist:
    for file in files:
        # llog_print_yzy(os.path.join(root, file), sys._getframe().f_lineno)

        # golden_output目录里，文件名字带log才考虑分析
        if 'golden_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
            str_txt_file = get_per_log(os.path.join(root, file))
            re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
            # llog_print_yzy(re_result, sys._getframe().f_lineno)

            GT_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)
        if 'atc_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
            str_txt_file = get_per_log(os.path.join(root, file))

            # llog_print_yzy(str_txt_file, sys._getframe().f_lineno)

            re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)

            ATC_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)

        if 'func_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
            str_txt_file = get_per_log(os.path.join(root, file))
            re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
            # llog_print_yzy(re_result, sys._getframe().f_lineno)

            FUNC_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)

        if 'inst_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
            str_txt_file = get_per_log(os.path.join(root, file))
            re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
            # llog_print_yzy(re_result, sys._getframe().f_lineno)

            INST_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)

        if 'prec_output' in os.path.join(root, file) and '.log' in os.path.join(root, file):
            str_txt_file = get_per_log(os.path.join(root, file))
            re_result = re.findall('(\[[error|Error|ERROR].+)', str_txt_file)
            # llog_print_yzy(re_result, sys._getframe().f_lineno)

            PREC_output += '[' + os.path.join(root, file).split('\\')[-1] + ']: ' + str(re_result)

llog_print_yzy(GT_output, ATC_output, FUNC_output, INST_output, PREC_output, sys._getframe().f_lineno)


''' 默认已经排过序了。因为是字符串的排序，所以规定：xlsx文件的命名格式必须统一为2023-01-12-xxx.xlsx (两位数字) '''
# logging.info(new_list)

# new_list_len = len(new_list)  # 获取xlsx文件的总个数
#
# GT_data = new_list[new_list_len - 2]  # 倒数第二个文件名 GT_data
# Result_data = new_list[new_list_len - 1]  # 最后一个文件名 Result_data
# logging.info('上周的数据GT_data：%s ' % GT_data)
# logging.info('这周的数据Result_data：%s ' % Result_data)
#
# shutil.copyfile(GT_data, './表/GT_data.xlsx')  # 只需要指定文件名即可
# shutil.copyfile(Result_data, './表/Result_data.xlsx')
# logging.info('已成功 把最新两周的数据，同步到了\'表\'的路径下面，并重命名')
#
#
# file_result_path = r'D:\T\Documents\MobileFile\桌面\python学习\file_type\data\result.xlsx'
# wb = openpyxl.load_workbook(file_result_path, data_only=True)  # 指定路径，打开工作簿      如果有公式的话，默认显示公式；data_only只读取数据.
# ws = wb.active  # 打开工作表
#
# logging.info('ws.max_row: %s' % (ws.max_row))
# # 一行一行地 获取表格里的网络名字
# for excel_row in range(1, ws.max_row + 1):
#     excel_net_name = ws.cell(excel_row, 1).value  # 获取excel里每一行的网络名字
#     logging.info('excel_net_name: %s' % (excel_net_name))
#
#     #     捕获到，可能会有空行的情况
#     if excel_net_name != None:
#         try:
#             str_1_fxquant = excel_net_name + '/atc_output/fxquant/' + excel_net_name
#             # logging.info(str_1_fxquant)
#             # print(str(lines))
#
#             re_result = re.findall(str_1_fxquant + '_fxquant_npuexe.json.+}', str(lines))
#             # logging.info('%s' % (re_result))
#
#             # () get到匹配的结果
#             inference_lantency = re.findall(r'inference_lantency\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             throughput = re.findall(r'throughput\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             bandwidth = re.findall(r'bandwidth\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             power = re.findall(r'power\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             load_time = re.findall(r'load_time\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#
#             # logging.info('inference_lantency: %s' % (inference_lantency[0]))
#             # logging.info('throughput: %s' % (throughput[0]))
#             # logging.info('bandwidth: %s' % (bandwidth[0]))  # 获取百分比的数字
#             # 如果是19的板子，带宽是9.9G   928的话，就是27.8G
#             bandwidth_tmp = "%.2f" % (float(bandwidth[0]) * 9.9 / 100)
#             # logging.info(bandwidth_tmp)
#             bandwidth[0] = str(bandwidth_tmp) + '(' + bandwidth[0] + '%)'
#
#             # logging.info('power: %s' % (power[0]))
#             # logging.info('load_time: %s' % (load_time[0]))
#
#             #     写到表格里：
#             ws.cell(excel_row, 7, inference_lantency[0])
#             ws.cell(excel_row, 8, throughput[0])
#             ws.cell(excel_row, 9, bandwidth[0])
#             ws.cell(excel_row, 10, power[0])
#             ws.cell(excel_row, 11, load_time[0])
#
#             ''' 下面是fxnuquant的数据 '''
#             str_1_fxnuquant = excel_net_name + '/atc_output/fxnuquant/' + excel_net_name
#             # logging.info(str_1_fxnuquant)
#             # print(str(lines))
#
#             re_result = re.findall(str_1_fxnuquant + '_fxnuquant_npuexe.json.+}', str(lines))
#             # logging.info('%s' % (re_result))
#
#             # () get到匹配的结果
#             inference_lantency = re.findall(r'inference_lantency\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             throughput = re.findall(r'throughput\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             bandwidth = re.findall(r'bandwidth\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             power = re.findall(r'power\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#             load_time = re.findall(r'load_time\\\': \\\'(\d+\.\d+)\\\'', str(re_result))
#
#             # logging.info('inference_lantency: %s' % (inference_lantency[0]))
#             # logging.info('throughput: %s' % (throughput[0]))
#             # logging.info('bandwidth: %s' % (bandwidth[0]))  # 获取百分比的数字
#             # 如果是19的板子，带宽是9.9G   928的话，就是27.8G
#             bandwidth_tmp = "%.2f" % (float(bandwidth[0]) * 9.9 / 100)
#             # logging.info(bandwidth_tmp)
#             bandwidth[0] = str(bandwidth_tmp) + '(' + bandwidth[0] + '%)'
#
#             # logging.info('power: %s' % (power[0]))
#             # logging.info('load_time: %s' % (load_time[0]))
#
#             #     写到表格里：
#             ws.cell(excel_row, 2, inference_lantency[0])
#             ws.cell(excel_row, 3, throughput[0])
#             ws.cell(excel_row, 4, bandwidth[0])
#             ws.cell(excel_row, 5, power[0])
#             ws.cell(excel_row, 6, load_time[0])
#         except:
#             # 如果有异常，就捕获，不影响其他net_name信息的获取
#             logging.error(traceback.format_exc())

# wb.save(file_result_path)