json互转jsonl，log-json

小炫y

已于 2024-02-19 16:13:08 修改

阅读量662

点赞数 8

文章标签： python

于 2024-02-06 11:44:55 首次发布

本文链接：https://blog.csdn.net/weixin_44740756/article/details/136044874

版权

json转jsonl

import json

def json_to_jsonl(json_file_path, jsonl_file_path):
    with open(json_file_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)

    with open(jsonl_file_path, 'w', encoding='utf-8') as jsonl_file:
        for entry in data:
            jsonl_file.write(json.dumps(entry, ensure_ascii=False) + '\n')

if __name__ == "__main__":
    json_file_path = "json.json" 
    jsonl_file_path = "output_jsonl.jsonl"

    json_to_jsonl(json_file_path, jsonl_file_path)

jsonl转json文件

import json

def convert_jsonl_to_json(jsonl_file_path, json_file_path):
    new_data = []

    with open(jsonl_file_path, 'r', encoding='utf-8') as jsonl_file:
        for line in jsonl_file:
            data = json.loads(line)

            question = data.get("prompt", "")

            new_entry = {
                "id": len(new_data) + 1,
                "part_id": "",
                "dialog": question,
                "target": "其他",
            }

            new_data.append(new_entry)

    with open(json_file_path, 'w', encoding='utf-8') as json_file:
        json.dump(new_data, json_file, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    jsonl_file_path = "train.jsonl"
    json_file_path = "output.json"

    convert_jsonl_to_json(jsonl_file_path, json_file_path)

json转表格

import pandas as pd
import json
def json_to_excel(json_file_path, excel_file_path):
    with open(json_file_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)

    df = pd.json_normalize(data)
    df.to_excel(excel_file_path, index=False)

if __name__ == "__main__":
    json_file_path = "new.json"
    excel_file_path = "output.xlsx"

    json_to_excel(json_file_path, excel_file_path)

读取log文件，匹配写入json

import re
import json

def parse_log(log_file_path):
    log_entries = []

    with open(log_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

        for i in range(len(lines)):
            line = lines[i].strip() # 获取当前行，并去掉首尾的空格和换行符。


            if "INFO - req_data: " in line:
# 如果包含上述字符串，使用正则表达式在当前行中搜索匹配 "INFO - req_data: " 后面的 JSON 数据。`match` 是一个正则匹配对象。
                match = re.search(r"INFO - req_data: ({.*})", line)
                if match:
                    try:
                        req_data_str = match.group(1)#从匹配中提取出 JSON 数据的字符串形式。

                        output_line = lines[i + 1].strip()#获取下一行，即 "INFO - output:" 所在的行。
                        if "INFO - output:" in output_line:
#如果包含上述字符串，提取出 "INFO - output: " 后面的文本内容。

                            output_text = output_line.split("INFO - output: ")[1]

                            entry = {"INFO": req_data_str, "output": output_text}
                            log_entries.append(entry)
                    except json.JSONDecodeError as e:
                        print(line)
                        print(f"Error decoding JSON: {e}")

    return log_entries


def save_to_json(log_entries, output_json_path):
    with open(output_json_path, 'w', encoding='utf-8') as json_file:
        json.dump(log_entries, json_file, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    log_file_path = "run.log"
    output_json_path = "25.json"

    log_entries = parse_log(log_file_path)
    save_to_json(log_entries, output_json_path)





=================================================
import re
import json


def parse_log(log_file_path):
    log_entries = []

    with open(log_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

        for i in range(len(lines)):
            line = lines[i].strip()

            if "INFO - req_data: " in line:
                match = re.search(r"INFO - req_data: ({.*})", line)
                if match:
                    json_str = re.sub(r"(['\"])([^'\"]*?)\1", r'"\2"', match.group(1))

                    try:
                        req_data = json.loads(json_str)

                        output_line = lines[i + 1].strip()
                        if "INFO - output:" in output_line:
                            output_text = output_line.split("INFO - output: ")[1]

                            entry = {"INFO": req_data.get("user_info", ""), "output": output_text}
                            log_entries.append(entry)
                    except json.JSONDecodeError as e:
                        print(line)
                        print(f"Error decoding JSON: {e}")

    return log_entries


def save_to_json(log_entries, output_json_path):
    with open(output_json_path, 'w', encoding='utf-8') as json_file:
        json.dump(log_entries, json_file, ensure_ascii=False, indent=2)


if __name__ == "__main__":
    log_file_path = "a.log"
    output_json_path = "b.json"

    log_entries = parse_log(log_file_path)
    save_to_json(log_entries, output_json_path)



==================================================

import json

def process_json(json_file_path, new_json_file_path):
    new_entries = []

    with open(json_file_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)

    for entry in data:

        info_value = entry.get("INFO", "")
        system_info_position = info_value.find("'system_info'")

        if system_info_position != -1:

            new_info_value = info_value[:system_info_position]

            new_entry = {"INFO": new_info_value, "output": entry.get("output", "")}
            new_entries.append(new_entry)

    with open(new_json_file_path, 'w', encoding='utf-8') as new_json_file:
        json.dump(new_entries, new_json_file, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    json_file_path = "a.json"
    new_json_file_path = "b.json"

    process_json(json_file_path, new_json_file_path)

统计字典分类数量


import json
from collections import Counter

def count_target_categories(json_file_path):
    with open(json_file_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)

    target_counter = Counter()
    print("sum:",len(data))

    for entry in data:
        target_category = entry.get("语言", "")

        target_counter[target_category] += 1

    return target_counter

if __name__ == "__main__":
    json_file_path = "download_25.json" 

    target_count = count_target_categories(json_file_path)

    for category, count in target_count.items():
        print(f"{category}: {count}")