读取xml格式的文件,提取情感信息,并存储成jsonl格式文件,代码如下:
import xml.etree.ElementTree as ET import json import jsonlines # 解析XML文件 tree = ET.parse('EmotionClassficationTest.xml') root = tree.getroot() # 构建由字典组成的列表 sentences_list = [] output_type={'like':'喜欢','happiness':'高兴','surprise':'惊讶','disgust':'恐惧','fear':'害怕','anger':'生气','sadness':'忧伤','none':'无'} for weibo in root.findall('weibo'): for sentence_elem in weibo.findall('sentence'): opinionated = sentence_elem.get('opinionated') if opinionated == 'Y': value={} value['instruction']='请分析评论的情感色彩,回复积极或者消极' value['input'] = sentence_elem.text.strip() if sentence_elem.text else '' value['output'] = output_type[sentence_elem.get('emotion-1-type')] sentences_list.append(value) # 将列表保存为JSONL文件 with jsonlines.open('outputweibo.jsonl', mode='w') as file: file.write_all(sentences_list) print('处理完成,结果已保存至 outputweibo.jsonl')
import xml.etree.ElementTree as ET import json import jsonlines # 解析XML文件 tree = ET.parse('EmotionClassficationTest.xml') root = tree.getroot() # 构建由字典组成的列表 sentences_list = [] output_type={'like':'喜欢','happiness':'高兴','surprise':'惊讶','disgust':'恐惧','fear':'害怕','anger':'生气','sadness':'忧伤','none':'无'} for weibo in root.findall('weibo'): for sentence_elem in weibo.findall('sentence'): opinionated = sentence_elem.get('opinionated') if opinionated == 'Y': value={} value['instruction']='请分析评论的情感色彩,回复积极或者消极' value['input'] = sentence_elem.text.strip() if sentence_elem.text else '' value['output'] = output_type[sentence_elem.get('emotion-1-type')] sentences_list.append(value) # 将列表保存为JSONL文件 with jsonlines.open('outputweibo.jsonl', mode='w') as file: file.write_all(sentences_list) print('处理完成,结果已保存至 outputweibo.jsonl')