json转jsonl
import json
def json_to_jsonl(json_file_path, jsonl_file_path):
with open(json_file_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
with open(jsonl_file_path, 'w', encoding='utf-8') as jsonl_file:
for entry in data:
jsonl_file.write(json.dumps(entry, ensure_ascii=False) + '\n')
if __name__ == "__main__":
json_file_path = "json.json"
jsonl_file_path = "output_jsonl.jsonl"
json_to_jsonl(json_file_path, jsonl_file_path)
jsonl转json文件
import json
def convert_jsonl_to_json(jsonl_file_path, json_file_path):
new_data = []
with open(jsonl_file_path, 'r', encoding='utf-8') as jsonl_file:
for line in jsonl_file:
data = json.loads(line)
question = data.get("prompt", "")
new_entry = {
"id": len(new_data) + 1,
"part_id": "",
"dialog": question,
"target": "其他",
}
new_data.append(new_entry)
with open(json_file_path, 'w', encoding='utf-8') as json_file:
json.dump(new_data, json_file, ensure_ascii=False, indent=2)
if __name__ == "__main__":
jsonl_file_path = "train.jsonl"
json_file_path = "output.json"
convert_jsonl_to_json(jsonl_file_path, json_file_path)
json转表格
import pandas as pd
import json
def json_to_excel(json_file_path, excel_file_path):
with open(json_file_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
df = pd.json_normalize(data)
df.to_excel(excel_file_path, index=False)
if __name__ == "__main__":
json_file_path = "new.json"
excel_file_path = "output.xlsx"
json_to_excel(json_file_path, excel_file_path)
读取log文件,匹配写入json
import re
import json
def parse_log(log_file_path):
log_entries = []
with open(log_file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
for i in range(len(lines)):
line = lines[i].strip() # 获取当前行,并去掉首尾的空格和换行符。
if "INFO - req_data: " in line:
# 如果包含上述字符串,使用正则表达式在当前行中搜索匹配 "INFO - req_data: " 后面的 JSON 数据。`match` 是一个正则匹配对象。
match = re.search(r"INFO - req_data: ({.*})", line)
if match:
try:
req_data_str = match.group(1)#从匹配中提取出 JSON 数据的字符串形式。
output_line = lines[i + 1].strip()#获取下一行,即 "INFO - output:" 所在的行。
if "INFO - output:" in output_line:
#如果包含上述字符串,提取出 "INFO - output: " 后面的文本内容。
output_text = output_line.split("INFO - output: ")[1]
entry = {"INFO": req_data_str, "output": output_text}
log_entries.append(entry)
except json.JSONDecodeError as e:
print(line)
print(f"Error decoding JSON: {e}")
return log_entries
def save_to_json(log_entries, output_json_path):
with open(output_json_path, 'w', encoding='utf-8') as json_file:
json.dump(log_entries, json_file, ensure_ascii=False, indent=2)
if __name__ == "__main__":
log_file_path = "run.log"
output_json_path = "25.json"
log_entries = parse_log(log_file_path)
save_to_json(log_entries, output_json_path)
=================================================
import re
import json
def parse_log(log_file_path):
log_entries = []
with open(log_file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
for i in range(len(lines)):
line = lines[i].strip()
if "INFO - req_data: " in line:
match = re.search(r"INFO - req_data: ({.*})", line)
if match:
json_str = re.sub(r"(['\"])([^'\"]*?)\1", r'"\2"', match.group(1))
try:
req_data = json.loads(json_str)
output_line = lines[i + 1].strip()
if "INFO - output:" in output_line:
output_text = output_line.split("INFO - output: ")[1]
entry = {"INFO": req_data.get("user_info", ""), "output": output_text}
log_entries.append(entry)
except json.JSONDecodeError as e:
print(line)
print(f"Error decoding JSON: {e}")
return log_entries
def save_to_json(log_entries, output_json_path):
with open(output_json_path, 'w', encoding='utf-8') as json_file:
json.dump(log_entries, json_file, ensure_ascii=False, indent=2)
if __name__ == "__main__":
log_file_path = "a.log"
output_json_path = "b.json"
log_entries = parse_log(log_file_path)
save_to_json(log_entries, output_json_path)
==================================================
import json
def process_json(json_file_path, new_json_file_path):
new_entries = []
with open(json_file_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
for entry in data:
info_value = entry.get("INFO", "")
system_info_position = info_value.find("'system_info'")
if system_info_position != -1:
new_info_value = info_value[:system_info_position]
new_entry = {"INFO": new_info_value, "output": entry.get("output", "")}
new_entries.append(new_entry)
with open(new_json_file_path, 'w', encoding='utf-8') as new_json_file:
json.dump(new_entries, new_json_file, ensure_ascii=False, indent=2)
if __name__ == "__main__":
json_file_path = "a.json"
new_json_file_path = "b.json"
process_json(json_file_path, new_json_file_path)
统计字典分类数量
import json
from collections import Counter
def count_target_categories(json_file_path):
with open(json_file_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
target_counter = Counter()
print("sum:",len(data))
for entry in data:
target_category = entry.get("语言", "")
target_counter[target_category] += 1
return target_counter
if __name__ == "__main__":
json_file_path = "download_25.json"
target_count = count_target_categories(json_file_path)
for category, count in target_count.items():
print(f"{category}: {count}")