传染病数据处理

"""
输入一个JSON list ,返回的格式为对话训练数据
"""
import json
import pandas as pd
import re
import uuid

zh_en = {
  "diagnosisMethods": "诊断方法",
  "infectiousDiseaseFeatures": "传染病特征",
  "infectiousDiseaseSymptom": "传染病症状",
  "infectiousDiseaseDefinition": "传染病定义",
  "infectiousDiseaseName": "传染病名称",
  "aliases": "别名",
  "protectiveMeasures": "防护措施",
  "referenceSource": "参考来源",
  "infectiousDiseaseFeatures.locality.description": "传染病特征.地区描述",
  "infectiousDiseaseFeatures.seasonality": "传染病特征.季节性描述.月份.月份",
  "infectiousDiseaseFeatures.popularity.levels.level": "传染病特征.流行程度.等级.等级",
  "diagnosisMethods.diagnosisMethod": "诊断方法.诊断方法",
  "infectiousDiseaseFeatures.infectiousSources.infectiousSource": "传染病特征.传染源.传染源",
  "aliases.alias": "别名.别名",
  "infectiousDiseaseDefinition.diseaseType": "传染病定义.疾病类型",
  "infectiousDiseaseFeatures.infectivityDesc": "传染病特征.传染性描述",
  "infectiousDiseaseFeatures.infectivity": "传染病特征.传染性",
  "infectiousDiseaseSymptom.infectionSymptom": "传染病症状.感染症状",
  "protectiveMeasures.disinfect": "防护措施.消毒",
  "infectiousDiseaseFeatures.susceptiblePopulation": "传染病特征.易感人群",
  "infectiousDiseaseDefinition.pathogen": "传染病定义.病原体",
  "protectiveMeasures.preventiveMeasure": "防护措施.预防措施",
  "infectiousDiseaseFeatures.popularity.description": "传染病特征.流行程度描述",
  "referenceSource": "参考来源",
  "infectiousDiseaseSymptom.complication": "传染病症状.并发症",
  "infectiousDiseaseFeatures.transmissionRoutes.transmissionRoute": "传染病特征.传播途径.传播途径",
  "infectiousDiseaseSymptom.infectionImmunity": "传染病症状.感染免疫",
  "infectiousDiseaseSymptom.treatment": "传染病症状.治疗",
  "infectiousDiseaseDefinition.diseaseDefinition": "传染病定义.疾病定义"
}
def read_csv():
    df = pd.read_csv("/home/wangyp/Big_Model/infectious_disease/data/kk.csv", encoding="utf8", )
    json_list = (df.reset_index()["info_json"].to_list())
    # 字段合并,去重
    list2 = [json.loads(i)["infectiousDiseaseSpecXml"] for i in json_list]
    # 统计字段的数量
    return list2

def transfer_json2conversitions(json_list):
    train_data = []
    for json_item in json_list:
        if json_item.get("infectiousDiseaseSpecXml").get("infectiousDiseaseName") is not None:
            print(json_item.get("infectiousDiseaseSpecXml").get("infectiousDiseaseName") )
        else:
            print("没有获取到这个字段")


def print_json_fields(json_obj, prefix=''):
    if isinstance(json_obj, dict):
        for key, value in json_obj.items():
            if isinstance(value, (dict, list)):
                print_json_fields(value, prefix + key + '.')
            else:
                print(prefix + key)
    elif isinstance(json_obj, list):
        for i, item in enumerate(json_obj):
            print_json_fields(item, prefix + str(i) + '.')


def get_all_field():
    with open("/home/wangyp/Big_Model/infectious_disease/data/f.txt", 'r', encoding='utf8') as f:
        lines = f.readlines()
        ll = [line.strip() for line in lines]
        res = set([re.sub(r'\d+.', '', i) for i in ll])
        # 打印头部的
        hh = set([i.split(".")[0] for i in res])
        return res


def gen_conversation_from_fields(field_name, json_list):
    conversations = []
    # 传染病特征.地区描述
    if field_name == 'infectiousDiseaseFeatures.locality.description':
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseFeatures').get('locality').get('description') is not None and item.get('infectiousDiseaseFeatures').get('locality').get('description') != "":
                q = disease_name + "该传染病在哪些地区容易流行?"
                v = item.get('infectiousDiseaseFeatures').get('locality').get('description')
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})

    if field_name == "infectiousDiseaseFeatures.seasonality":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseFeatures').get('seasonality') is not None and item.get('infectiousDiseaseFeatures').get('seasonality') != "":
                q = disease_name + "该传染病的季节性特征是什么?它与哪些自然气候条件有关?在一年中的哪些月份更为常见?"
                v = item.get('infectiousDiseaseFeatures').get('locality').get('description')
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
    if field_name == "infectiousDiseaseFeatures.popularity.levels.level":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            try:
                if item.get('infectiousDiseaseFeatures').get('popularity').get('levels').get('level') is not None and item.get('infectiousDiseaseFeatures').get('popularity').get('levels').get('level') != "":
                    q = disease_name + "这个传染病的流行程度有哪些不同的级别?"
                    if isinstance(item.get('infectiousDiseaseFeatures').get('popularity').get('levels').get('level'), list):
                        v = "该传染病的级别有:" + "、".join(item.get('infectiousDiseaseFeatures').get('popularity').get('levels').get('level')) + "\n“传染病的流行程度可以根据其在人群中传播的广泛性和速度分为四个级别:‘暴发’、‘散发’、‘流行’和‘大流行’。‘暴发’是指在特定时间和地点内病例数量的突然增加;‘散发’指的是病例以较低频率和较小规模发生;‘流行’意味着病例在一定区域内广泛传播;而‘大流行’则是指疾病在全球范围内广泛传播,影响到大量人群。”"
                    else:
                        v = "该传染病的级别有:" + str(item.get('infectiousDiseaseFeatures').get('popularity').get('levels').get('level')) + "\n“传染病的流行程度可以根据其在人群中传播的广泛性和速度分为四个级别:‘暴发’、‘散发’、‘流行’和‘大流行’。‘暴发’是指在特定时间和地点内病例数量的突然增加;‘散发’指的是病例以较低频率和较小规模发生;‘流行’意味着病例在一定区域内广泛传播;而‘大流行’则是指疾病在全球范围内广泛传播,影响到大量人群。”"
                    if v is not None:
                        conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
            except Exception as e:
                continue

    if field_name == "diagnosisMethods.diagnosisMethod":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('diagnosisMethods').get('diagnosisMethod') is not None and item.get('diagnosisMethods').get('diagnosisMethod') != "":
                diagnosisMethod = item.get('diagnosisMethods').get('diagnosisMethod')
                if isinstance(diagnosisMethod, dict):
                    v1 = diagnosisMethod.get("checkSubject", "") + ": " + diagnosisMethod.get("description", "")
                elif isinstance(diagnosisMethod, list):
                    v1 = ""
                    for i in diagnosisMethod:
                        v1 += i.get("checkSubject", "") + ": " + i.get("description", "") + "\n"
            q = disease_name + "该传染病的诊断方法是什么?"
            v = "该疾病的诊断方法如下\n:" + v1
            if v is not None:
                conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant", "value": v}]})
    # 需要判断list dict
    if field_name == "infectiousDiseaseFeatures.infectiousSources.infectiousSource":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseFeatures').get('infectiousSources') is not None and item.get('infectiousDiseaseFeatures').get('infectiousSources') != "":
                q = disease_name + "这种传染病主要的传染源是什么?"
                v=""
                if isinstance(item.get('infectiousDiseaseFeatures').get('infectiousSources').get('infectiousSource'), dict):

                    if item.get('infectiousDiseaseFeatures').get('infectiousSources').get('infectiousSource').get('infectiousSourceName') is not None:
                        v = "主要的感染源是" + item.get('infectiousDiseaseFeatures').get('infectiousSources').get('infectiousSource').get('infectiousSourceName')
                        if item.get('infectiousDiseaseFeatures').get('infectiousSources').get('infectiousSource').get('description') is not None :
                            v += item.get('infectiousDiseaseFeatures').get('infectiousSources').get('infectiousSource').get('description')
                
                elif isinstance(item.get('infectiousDiseaseFeatures').get('infectiousSources').get('infectiousSource'), list):
                    for i in item.get('infectiousDiseaseFeatures').get('infectiousSources').get('infectiousSource'):
                        if i.get('infectiousSourceName') is not None:
                            v += i.get('infectiousSourceName')
                        if i.get('description') is not None:
                            v += i.get('description') + "\n"
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant", "value": v}]})
    # 字符串 list 空
    if field_name == "aliases.alias":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get("aliases") is not None and item.get("aliases") != "":
                q = disease_name + "这个疾病的别名是什么?"
                if isinstance(item.get("aliases").get("alias"), list):
                    alias = ",".join(item.get("aliases").get("alias"))
                else:
                    alias = item.get("aliases").get("alias")
                v = disease_name + "的别名是" + alias
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
            else:
                q = disease_name + "这个疾病的别名是什么?"
                v = "无"
                conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},
                                                                                 {"from": "assistant", "value": v}]})
    # "infectiousDiseaseDefinition.diseaseType": "传染病定义.疾病类型",
    if field_name == 'infectiousDiseaseDefinition.diseaseType':
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseDefinition').get('diseaseType') is not None and item.get('infectiousDiseaseDefinition').get('diseaseType') != "":
                q = disease_name + "该传染病的疾病定义是什么?"
                v = item.get('infectiousDiseaseDefinition').get('diseaseType')
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
    # "infectiousDiseaseFeatures.infectivityDesc": "传染病特征.传染性描述",
    if field_name == 'infectiousDiseaseFeatures.infectivityDesc':
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseFeatures').get('infectivityDesc') is not None and item.get('infectiousDiseaseFeatures').get('infectivityDesc') != "":
                q =  "简单描述一下传染病" + disease_name
                v = item.get('infectiousDiseaseFeatures').get('infectivityDesc')
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
    # "infectiousDiseaseFeatures.infectivity": "传染病特征.传染性",
    if field_name == 'infectiousDiseaseFeatures.infectivity':
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseFeatures').get('infectivity') is not None and item.get('infectiousDiseaseFeatures').get('infectivity') != "":
                q =  disease_name + "该传染病传染强度怎么样?"
                v = item.get('infectiousDiseaseFeatures').get('infectivity')
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
    # "infectiousDiseaseSymptom.infectionSymptom": "传染病症状.感染症状",
    if field_name == "infectiousDiseaseSymptom.infectionSymptom":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseSymptom').get('infectionSymptom') is not None and item.get('infectiousDiseaseSymptom').get('infectionSymptom') != "":
                q =  disease_name + "该传染病传染后有什么样的症状?"
                v = item.get('infectiousDiseaseSymptom').get('infectionSymptom').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
    # "protectiveMeasures.disinfect": "防护措施.消毒",
    if field_name == "protectiveMeasures.disinfect":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('protectiveMeasures').get('disinfect') is not None and item.get('protectiveMeasures').get('disinfect') != "":
                q =  disease_name + "该传染病有哪些防护措施?"
                v = item.get('protectiveMeasures').get('disinfect').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
    # "infectiousDiseaseFeatures.susceptiblePopulation": "传染病特征.易感人群",
    if field_name == "infectiousDiseaseFeatures.susceptiblePopulation":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseFeatures').get('susceptiblePopulation') is not None and item.get('infectiousDiseaseFeatures').get('susceptiblePopulation') != "":
                q =  disease_name + "该传染病那些人容易感染?"
                v = item.get('infectiousDiseaseFeatures').get('susceptiblePopulation').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})

    # "infectiousDiseaseDefinition.pathogen": "传染病定义.病原体",
    if field_name == "infectiousDiseaseDefinition.pathogen":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseDefinition').get('pathogen') is not None and item.get('infectiousDiseaseDefinition').get('pathogen') != "":
                q =  disease_name + "该传染病的病原体有什么特征?"
                v = item.get('infectiousDiseaseDefinition').get('pathogen').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},{"from": "assistant","value": v}]})
    # "protectiveMeasures.preventiveMeasure": "防护措施.预防措施",
    if field_name == "protectiveMeasures.preventiveMeasure":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('protectiveMeasures').get('preventiveMeasure') is not None and item.get('protectiveMeasures').get('preventiveMeasure') != "":
                q = disease_name + "该传染病的可以采取哪些预防防护措施?"
                v = item.get('protectiveMeasures').get('preventiveMeasure').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})

    # "infectiousDiseaseFeatures.popularity.description": "传染病特征.流行程度描述",
    if field_name == "infectiousDiseaseFeatures.popularity.description":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseFeatures').get('popularity').get('description') is not None and item.get('infectiousDiseaseFeatures').get('popularity').get('description') != "":
                q = disease_name + "该传染病主要在哪些地区流行?在哪些国家或地区更为常见?请简单描述其流行性。"
                v = item.get('infectiousDiseaseFeatures').get('popularity').get('description').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})

    # "referenceSource": "参考来源",
    if field_name == "referenceSource":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('referenceSource') is not None and item.get('referenceSource') != "":
                q = disease_name + "该传染病参考文献或者来源是什么?"
                v = item.get('referenceSource').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})
    # "infectiousDiseaseSymptom.complication": "传染病症状.并发症",
    if field_name == "infectiousDiseaseSymptom.complication":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseSymptom').get('complication') is not None and item.get('infectiousDiseaseSymptom').get('complication') != "":
                q = disease_name + "该传染病的并发症状有哪些?"
                v = item.get('infectiousDiseaseSymptom').get('complication').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})
    # "infectiousDiseaseFeatures.transmissionRoutes.transmissionRoute": "传染病特征.传播途径.传播途径",
    # list str
    if field_name == "infectiousDiseaseFeatures.transmissionRoutes.transmissionRoute":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            try:
                if item.get('infectiousDiseaseFeatures').get('transmissionRoutes').get('transmissionRoute') is not None and item.get('infectiousDiseaseFeatures').get('transmissionRoutes').get('transmissionRoute') != "":
                    q = disease_name + "该传染病的传播路径有哪些?"
                    if isinstance(item.get('infectiousDiseaseFeatures').get('transmissionRoutes').get('transmissionRoute') , str):
                        v = item.get('infectiousDiseaseFeatures').get('transmissionRoutes').get('transmissionRoute').replace('"',"").replace("'","")
                    elif isinstance(item.get('infectiousDiseaseFeatures').get('transmissionRoutes').get('transmissionRoute') , list):
                        v = ",".join(item.get('infectiousDiseaseFeatures').get('transmissionRoutes').get('transmissionRoute').replace('"',"").replace("'",""))
                    if v is not None:
                        conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})
            except Exception as e:
                continue

    # "infectiousDiseaseSymptom.infectionImmunity": "传染病症状.感染免疫",
    if field_name == "infectiousDiseaseSymptom.infectionImmunity":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseSymptom').get('infectionImmunity') is not None and item.get('infectiousDiseaseSymptom').get('infectionImmunity') != "":
                q =  "感染了"+ disease_name +"这种传染病后,患者康复了还会再次感染吗?"
                v = item.get('infectiousDiseaseSymptom').get('infectionImmunity').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})

    # "infectiousDiseaseSymptom.treatment": "传染病症状.治疗",
    if field_name == "infectiousDiseaseSymptom.treatment":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseSymptom').get('treatment') is not None and item.get('infectiousDiseaseSymptom').get('treatment') != "":
                q = disease_name +"这种传染病怎么治疗?"
                v = item.get('infectiousDiseaseSymptom').get('treatment').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})
    # "infectiousDiseaseDefinition.diseaseDefinition": "传染病定义.疾病定义"
    if field_name == "infectiousDiseaseDefinition.diseaseDefinition":
        for item in json_list:
            disease_name = item.get("infectiousDiseaseName")
            if item.get('infectiousDiseaseDefinition').get('diseaseDefinition') is not None and item.get('infectiousDiseaseDefinition').get('diseaseDefinition') != "":
                q = disease_name +"这种传染病怎么治疗?"
                v = item.get('infectiousDiseaseDefinition').get('diseaseDefinition').replace('"',"").replace("'","")
                if v is not None:
                    conversations.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q}, {"from": "assistant","value": v}]})
    return conversations


# 传入key
# 根据字段判断,返回正确的value
def get_diagnosisMethod_answer(jsonbody, key):
    # 如果是"diagnosisMethods": "诊断方法",  checkSubject + description, 作为答案
    if key == "diagnosisMethods":
        diagnosisMethod = jsonbody.get("diagnosisMethods").get("diagnosisMethod")
        if isinstance(diagnosisMethod, dict):
            return diagnosisMethod.get("checkSubject", "") + ": " + diagnosisMethod.get("description", "")
        elif isinstance(diagnosisMethod, list):
            res = ""
            for i in diagnosisMethod:
                res +=  i.get("checkSubject", "") + ": " + i.get("description", "") + "\n"
            return  res


# 字段翻译
# 输入是JSON list
# 输出是conversion list


def translate_en_zh(json_list):
    conversation_list = []
    for ijson in json_list:
        # 遍历json的一级字段,然后翻译,翻译完以后拼接为conversion
        for key, value in ijson.items():
            # 先拿到疾病名
            disease_name = ijson.get("infectiousDiseaseName")
            # 从zh_en json中去查找,找到就翻译并生成prompt
            if key in zh_en.keys() and key in ijson.keys():
                q = disease_name + "这个疾病的" + zh_en.get(key) + "是什么?"
                if ijson.get(key) is not None:
                    v = disease_name + "的" + zh_en.get(key) + "是" + str(get_diagnosisMethod_answer(ijson, key))
                    if v is not None:
                        conversation_list.append({"id": str(uuid.uuid4()), "conversations": [{"from": "user", "value": q},
                                                                                         {"from": "assistant","value": v}]})
            else:
                continue

    with open("/home/wangyp/Big_Model/infectious_disease/data/t.json", 'w', encoding='utf8') as f:
        json.dump(conversation_list, f, ensure_ascii=False, indent=4 )


def GPT_QA():
    pass



def merge_data():
    # 读取CSV数据
    json_list = read_csv()
    fields = ["infectiousDiseaseFeatures.locality.description",
              "infectiousDiseaseFeatures.seasonality",
              "infectiousDiseaseFeatures.popularity.levels.level",
              "diagnosisMethods.diagnosisMethod",
              "infectiousDiseaseFeatures.infectiousSources.infectiousSource",
              "aliases.alias",
              "infectiousDiseaseDefinition.diseaseType",
              "infectiousDiseaseFeatures.infectivityDesc",
              "infectiousDiseaseFeatures.infectivity",
              "infectiousDiseaseSymptom.infectionSymptom",
              "protectiveMeasures.disinfect",
              "infectiousDiseaseFeatures.susceptiblePopulation",
              "infectiousDiseaseDefinition.pathogen",
              "protectiveMeasures.preventiveMeasure",
              "infectiousDiseaseFeatures.popularity.description",
              "referenceSource",
              "infectiousDiseaseSymptom.complication",
              "infectiousDiseaseFeatures.transmissionRoutes.transmissionRoute",
              "infectiousDiseaseSymptom.infectionImmunity",
              "infectiousDiseaseSymptom.treatment",
              "infectiousDiseaseDefinition.diseaseDefinition"


              ]
    ll = []
    for field_name in fields:
        hh = gen_conversation_from_fields(field_name=field_name, json_list=json_list)
        ll += hh
    print(len(ll))
    with open('/home/wangyp/Big_Model/infectious_disease/data/t.json', 'w', encoding='utf-8') as f:
        f.write("\n".join(str(l) for l in ll))
    # hh = gen_conversation_from_fields(field_name="infectiousDiseaseDefinition.diseaseDefinition", json_list=json_list)
    # # print((hh))
    # with open('/home/wangyp/Big_Model/infectious_disease/data/t.json', 'w', encoding='utf-8') as f:
    #     f.write(str(hh))



def add_q():
    # 读取src
    # 读取qa, 循环qa, 循环src, 将qa添加到src, 写入src
    with open('/mnt/sdd/big_model_data/data_multi_chat/question.json', 'r', encoding='utf-8') as f:
        qa = json.loads(f.read())
        
    with open('/home/wangyp/Big_Model/infectious_disease/data/t.json', 'r', encoding='utf-8') as f2:
        src = f2.readlines()

    # print(json.loads(src[3])['conversations'])
    # print(type(json.loads(src[3])['conversations']))
    #
    # print(qa[3]['conversations'])
    # print(type(qa[3]['conversations']))
    #
    # print(json.loads(src[3])['conversations'] + qa[3]['conversations'])


    all = []
    for id, i in enumerate(src):
        jsoni = json.loads(i)
        if id <= len(qa) - 1:
            jsoni['conversations'] += qa[id]['conversations']
        all.append(jsoni)



    print(all[:3])
    with open('/home/wangyp/Big_Model/infectious_disease/data/t2.json','w', encoding='utf-8') as f:
        f.write(",\n".join(str(i) for i in all))



if __name__ == '__main__':
    # merge_data()
    add_q()





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值