log 格式如下的处理方式:
[INFO][24 Mar 2024 20:05:50,148] Received request => COMMAND: CHECKIN
import re
import os
import csv
from datetime import datetime
# 定义日志模式
log_pattern = r'\[(\w+)\]\[(.+?)\] (.+)'
checkin_pattern = r'Received request \=\> COMMAND: (\w+)\s+HOST: (\w+)\s+USERID: (\w+)'
# 创建输出文件夹(如果不存在)
output_dir = 'output'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 处理input文件夹下的所有日志文件
input_dir = 'input'
for filename in os.listdir(input_dir):
if filename.endswith('.log'):
log_file_path = os.path.join(input_dir, filename)
output_file_path = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}.csv")
# 打开输出CSV文件
with open(output_file_path, 'w', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Timestamp', 'Host', 'User ID', 'Command'])
# 打开日志文件并处理
with open(log_file_path, 'r') as log_file:
for line in log_file:
# 尝试匹配日志模式
log_match = re.match(log_pattern, line)
if log_match:
level, timestamp_str, message = log_match.groups()
timestamp = datetime.strptime(timestamp_str, '%d %b %Y %H:%M:%S,%f')
# 尝试匹配CHECKIN命令
checkin_match = re.search(checkin_pattern, message)
if checkin_match:
command, host, user_id = checkin_match.groups()
# 写入CSV文件
writer.writerow([timestamp, host, user_id, command])
print(f'Log data saved to {output_file_path}')
如下格式的log处理方式如下
2:22:32 (FINLE) (@FINLE-SLOG@) Time: Wed Mar 20 2024 02:22:32 CST
21:23:47 (FINLE) IN: "ACE_platinum_base" H000001@HW-20230101AP
21:32:34 (FINLE) OUT: "ACE_platinum_base" H000001@HW-2023010AP
import re
import csv
from datetime import datetime
# 定义日志模式
time_pattern = r'Time: (\w+\s+\w+\s+\d+\s+\d+:\d+:\d+\s+\w+)'
in_out_pattern = r'(IN|OUT): "(.+)" (\w+)@(\w+)'
# 打开输入日志文件
with open('FINE.log', 'r') as log_file:
# 初始化变量
current_date = None
log_data = []
for line in log_file:
# 尝试匹配时间行
time_match = re.search(time_pattern, line)
if time_match:
current_date = datetime.strptime(time_match.group(1), '%a %b %d %Y %H:%M:%S %Z').date()
# 尝试匹配IN/OUT行
in_out_match = re.search(in_out_pattern, line)
if in_out_match:
if current_date:
io_type, db_name, user_id, host = in_out_match.groups()
if 'ACE_platinum_base' in db_name:
timestamp = datetime.combine(current_date, datetime.strptime(line[:8], '%H:%M:%S').time())
log_data.append([timestamp, io_type, user_id, host, db_name])
# 打开输出CSV文件
with open('output.csv', 'w', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Timestamp', 'IN/OUT', 'User ID', 'Host', 'Database'])
# 写入数据
for row in log_data:
writer.writerow(row)
print('Log data saved to output.csv')
json格式数据导出csv:
import json
import csv
import os
# 创建输出文件夹(如果不存在)
output_dir = 'output'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 处理input文件夹下的所有JSON文件
input_dir = 'input'
for filename in os.listdir(input_dir):
if filename.endswith('.json'):
json_file_path = os.path.join(input_dir, filename)
output_file_path = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}.csv")
# 打开JSON文件
with open(json_file_path, 'r') as jsonfile:
json_data = jsonfile.read() # 读取JSON文件内容
# 将JSON字符串转换为Python对象
data = json.loads(json_data)
# 打开CSV文件进行写入
with open(output_file_path, 'w', newline='') as csvfile:
fieldnames = list(data[0].keys()) # 获取第一个字典的键作为列名
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader() # 写入列名
for item in data:
writer.writerow(item) # 写入每一行数据
print(f'JSON数据已成功转换为CSV文件: {output_file_path}')