实现思路
1.获取文件夹下的所有txt文件
2.读取文件每一行
3.找到时间戳,转换成UTC时间,生成新行
4.把每一行写入到新的文件 *_out.log
代码实现
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import os
import re
import codecs
import chardet
from datetime import datetime, timezone, timedelta
def timestamp_to_utc(timestamp_ms):
# 将时间戳转换为秒
timestamp_sec = timestamp_ms / 1000.0
# 取ms时间
tail_ms = timestamp_ms % 1000
# 创建一个时间元组,表示从1970-01-01 00:00:00开始的秒数
naive_datetime = datetime.fromtimestamp(timestamp_sec)
# 将时间转换为UTC
utc_datetime = naive_datetime - timedelta(
seconds=naive_datetime.utcoffset().total_seconds() if naive_datetime.utcoffset() else 0)
# 将UTC日期时间转换为字符串
utc_str = utc_datetime.strftime('%Y-%m-%d %H:%M:%S:' + str(tail_ms).zfill(3))
return utc_str
# txt文件任何编码格式转为utf-8
def convert_to_utf8(input_file, output_file):
with open(input_file, 'rb') as f:
raw_data = f.read()
result = chardet.detect(raw_data)
encoding = result['encoding']
with codecs.open(input_file, 'r', encoding=encoding) as f:
content = f.read()
with codecs.open(output_file, 'w', 'utf-8') as f:
f.write(content)
# 获取当前文件夹路径
current_folder = os.getcwd()
# 获取文件夹下所有.txt文件
txt_files = [f for f in os.listdir(current_folder) if f.endswith('.txt')]
for txt_file in txt_files:
convert_to_utf8(txt_file, txt_file)
with open(txt_file, 'r', encoding='utf-8') as file:
# 检查文件是否存在
file_name_out = txt_file + '_out.log'
# 打开源文件和目标文件
with open(txt_file, 'r', encoding='utf-8') as source, open(file_name_out, 'w', encoding='utf-8') as target:
# 逐行读取源文件
for line in source:
# 将读取的行写入目标文件
# 要搜索的模式
pattern = r"\[([0-9A-Fa-f]{12})\]"
# 使用正则表达式匹配十六进制数
match = re.search(pattern, line)
if match:
# 提取十六进制数并转换为整数
hex_num = match.group(1)
timestamp = int(hex_num, 16)
if timestamp > 1000:
utc_time_str = timestamp_to_utc(timestamp)
rep_utc_str = "[" + utc_time_str + "]"
# 替换文本中符合正则表达式的部分
new_line = re.sub(pattern, rep_utc_str, line)
else:
new_line = line
else:
new_line = line
target.write(new_line)