python替换log ms级别时间戳为UTC时间

_夜阑卧听风吹雨

已于 2024-07-23 19:11:31 修改

阅读量795

点赞数 6

文章标签： python mysql 开发语言

于 2024-05-30 13:34:44 首次发布

本文链接：https://blog.csdn.net/qq_32342949/article/details/139321713

版权

实现思路

1.获取文件夹下的所有txt文件

2.读取文件每一行

3.找到时间戳，转换成UTC时间，生成新行

4.把每一行写入到新的文件 *_out.log

代码实现

# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

import os
import re
import codecs
import chardet
from datetime import datetime, timezone, timedelta


def timestamp_to_utc(timestamp_ms):
    # 将时间戳转换为秒
    timestamp_sec = timestamp_ms / 1000.0
    # 取ms时间
    tail_ms = timestamp_ms % 1000
    # 创建一个时间元组，表示从1970-01-01 00:00:00开始的秒数
    naive_datetime = datetime.fromtimestamp(timestamp_sec)
    # 将时间转换为UTC
    utc_datetime = naive_datetime - timedelta(
        seconds=naive_datetime.utcoffset().total_seconds() if naive_datetime.utcoffset() else 0)
    # 将UTC日期时间转换为字符串
    utc_str = utc_datetime.strftime('%Y-%m-%d %H:%M:%S:' + str(tail_ms).zfill(3))
    return utc_str


#  txt文件任何编码格式转为utf-8
def convert_to_utf8(input_file, output_file):
    with open(input_file, 'rb') as f:
        raw_data = f.read()
        result = chardet.detect(raw_data)
        encoding = result['encoding']

    with codecs.open(input_file, 'r', encoding=encoding) as f:
        content = f.read()

    with codecs.open(output_file, 'w', 'utf-8') as f:
        f.write(content)


# 获取当前文件夹路径
current_folder = os.getcwd()

# 获取文件夹下所有.txt文件
txt_files = [f for f in os.listdir(current_folder) if f.endswith('.txt')]

for txt_file in txt_files:
    convert_to_utf8(txt_file, txt_file)
    with open(txt_file, 'r', encoding='utf-8') as file:
        # 检查文件是否存在
        file_name_out = txt_file + '_out.log'
        # 打开源文件和目标文件
        with open(txt_file, 'r', encoding='utf-8') as source, open(file_name_out, 'w', encoding='utf-8') as target:
            # 逐行读取源文件
            for line in source:
                # 将读取的行写入目标文件
                # 要搜索的模式
                pattern = r"\[([0-9A-Fa-f]{12})\]"
                # 使用正则表达式匹配十六进制数
                match = re.search(pattern, line)
                if match:
                    # 提取十六进制数并转换为整数
                    hex_num = match.group(1)
                    timestamp = int(hex_num, 16)

                    if timestamp > 1000:
                        utc_time_str = timestamp_to_utc(timestamp)
                        rep_utc_str = "[" + utc_time_str + "]"
                        # 替换文本中符合正则表达式的部分
                        new_line = re.sub(pattern, rep_utc_str, line)

                    else:
                        new_line = line

                else:
                    new_line = line

                target.write(new_line)