使用第三方的模型标注自然语言
1、二分类,代码如下
import requests import pandas as pd import jsonlines import time API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions" headers = { # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌 "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b", "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() # 读取csv文件 data = pd.read_csv('train11-2.csv') # 删除label列的所有空行 data = data[data['label'].notna()] # 重置索引 data = data.reset_index(drop=True) print(data.head(5)) x = [] output_type = {0: '积极', 1: '消极'} # t0=time.time() for i in range(0, len(data)): value = {} value['instrution'] = '请分析评论的情感色彩,仅回复积极或者消极' value['input'] = data['text_a'][i] output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]}) pass # output = query({"messages": [{"role": "user", "content": "北京有啥好玩的地方"}]}) print(value['input'],['积极' if '积极' in output['result'] else '消极'][0]) value['output'] = ['积极' if '积极' in output['result'] else '消极'][0] x.append(value) if len(x)>10: # # t1 = time.time() # print(t1-t0) break with jsonlines.open('outputjsonl222222.jsonl', 'w') as file: file.write_all(x)
import requests import pandas as pd import jsonlines import time API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions" headers = { # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌 "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b", "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() # 读取csv文件 data = pd.read_csv('train11-2.csv') # 删除label列的所有空行 data = data[data['label'].notna()] # 重置索引 data = data.reset_index(drop=True) print(data.head(5)) x = [] output_type = {0: '积极', 1: '消极'} # t0=time.time() for i in range(0, len(data)): value = {} value['instrution'] = '请分析评论的情感色彩,仅回复积极或者消极' value['input'] = data['text_a'][i] output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]}) pass # output = query({"messages": [{"role": "user", "content": "北京有啥好玩的地方"}]}) print(value['input'],['积极' if '积极' in output['result'] else '消极'][0]) value['output'] = ['积极' if '积极' in output['result'] else '消极'][0] x.append(value) if len(x)>10: # # t1 = time.time() # print(t1-t0) break with jsonlines.open('outputjsonl222222.jsonl', 'w') as file: file.write_all(x)
2、多分类模型,代码如下:
import requests import pandas as pd import jsonlines import time API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions" headers = { # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌 "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b", "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def return_type(output, t=['积极','消极','中性']): for s in t: if s in output: return s # if s not in output: # continue # else: return 'X' if __name__ == "__main__": # 读取csv文件 data = pd.read_csv('train11-2.csv') # 删除label列的所有空行 data = data[data['label'].notna()] # 重置索引 data = data.reset_index(drop=True) print(data.head(5)) x = [] #output_type = {0: '积极', 1: '消极'} output_type = {0: '积极', 1: '消极', 2:'中性'} t0=time.time() for i in range(0, len(data)): value = {} value['instrution'] = '请分析评论的情感色彩,仅回复{}'.format("或者".join(list(output_type.values()))) value['input'] = data['text_a'][i] output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]}) # print(list(output_type.values())) output = return_type(output['result'], t=list(output_type.values())) print(value['input'], output) value['output'] = output x.append(value) if len(x)>10: # t1 = time.time() # print(t1-t0) break with jsonlines.open('outputjsonl222222.jsonl', 'w') as file: file.write_all(x)
import requests import pandas as pd import jsonlines import time API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions" headers = { # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌 "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b", "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def return_type(output, t=['积极','消极','中性']): for s in t: if s in output: return s # if s not in output: # continue # else: return 'X' if __name__ == "__main__": # 读取csv文件 data = pd.read_csv('train11-2.csv') # 删除label列的所有空行 data = data[data['label'].notna()] # 重置索引 data = data.reset_index(drop=True) print(data.head(5)) x = [] #output_type = {0: '积极', 1: '消极'} output_type = {0: '积极', 1: '消极', 2:'中性'} t0=time.time() for i in range(0, len(data)): value = {} value['instrution'] = '请分析评论的情感色彩,仅回复{}'.format("或者".join(list(output_type.values()))) value['input'] = data['text_a'][i] output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]}) # print(list(output_type.values())) output = return_type(output['result'], t=list(output_type.values())) print(value['input'], output) value['output'] = output x.append(value) if len(x)>10: # t1 = time.time() # print(t1-t0) break with jsonlines.open('outputjsonl222222.jsonl', 'w') as file: file.write_all(x)