1-8大模型标注自然语言

使用第三方的模型标注自然语言

1、二分类,代码如下

import requests import pandas as pd import jsonlines import time API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions" headers = { # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌 "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b", "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() # 读取csv文件 data = pd.read_csv('train11-2.csv') # 删除label列的所有空行 data = data[data['label'].notna()] # 重置索引 data = data.reset_index(drop=True) print(data.head(5)) x = [] output_type = {0: '积极', 1: '消极'} # t0=time.time() for i in range(0, len(data)): value = {} value['instrution'] = '请分析评论的情感色彩,仅回复积极或者消极' value['input'] = data['text_a'][i] output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]}) pass # output = query({"messages": [{"role": "user", "content": "北京有啥好玩的地方"}]}) print(value['input'],['积极' if '积极' in output['result'] else '消极'][0]) value['output'] = ['积极' if '积极' in output['result'] else '消极'][0] x.append(value) if len(x)>10: # # t1 = time.time() # print(t1-t0) break with jsonlines.open('outputjsonl222222.jsonl', 'w') as file: file.write_all(x)

import requests
import pandas as pd
import jsonlines
import time
API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions"
headers = {
    # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌
    "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b",
    "Content-Type": "application/json"
}


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()


# 读取csv文件
data = pd.read_csv('train11-2.csv')
# 删除label列的所有空行
data = data[data['label'].notna()]

# 重置索引
data = data.reset_index(drop=True)
print(data.head(5))
x = []
output_type = {0: '积极', 1: '消极'}
# t0=time.time()
for i in range(0, len(data)):
    value = {}
    value['instrution'] = '请分析评论的情感色彩,仅回复积极或者消极'
    value['input'] = data['text_a'][i]
    output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]})
    pass
    # output = query({"messages": [{"role": "user", "content": "北京有啥好玩的地方"}]})
    print(value['input'],['积极' if '积极' in output['result'] else '消极'][0])
    value['output'] = ['积极' if '积极' in output['result'] else '消极'][0]
    x.append(value)

    if len(x)>10:
        #
        # t1 = time.time()
        # print(t1-t0)
        break
with jsonlines.open('outputjsonl222222.jsonl', 'w') as file:
    file.write_all(x)

2、多分类模型,代码如下:

import requests import pandas as pd import jsonlines import time API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions" headers = { # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌 "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b", "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def return_type(output, t=['积极','消极','中性']): for s in t: if s in output: return s # if s not in output: # continue # else: return 'X' if __name__ == "__main__": # 读取csv文件 data = pd.read_csv('train11-2.csv') # 删除label列的所有空行 data = data[data['label'].notna()] # 重置索引 data = data.reset_index(drop=True) print(data.head(5)) x = [] #output_type = {0: '积极', 1: '消极'} output_type = {0: '积极', 1: '消极', 2:'中性'} t0=time.time() for i in range(0, len(data)): value = {} value['instrution'] = '请分析评论的情感色彩,仅回复{}'.format("或者".join(list(output_type.values()))) value['input'] = data['text_a'][i] output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]}) # print(list(output_type.values())) output = return_type(output['result'], t=list(output_type.values())) print(value['input'], output) value['output'] = output x.append(value) if len(x)>10: # t1 = time.time() # print(t1-t0) break with jsonlines.open('outputjsonl222222.jsonl', 'w') as file: file.write_all(x)

import requests
import pandas as pd
import jsonlines
import time
API_URL = "https://o7i0y198m25cz3xf.aistudio-hub.baidu.com/chat/completions"
headers = {
    # 请前往 https://aistudio.baidu.com/index/accessToken 查看 访问令牌
    "Authorization": "a2dc61f2f9e49e96681af067d9969325919fee7b",
    "Content-Type": "application/json"
}


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def return_type(output, t=['积极','消极','中性']):
    for s in t:
        if s in output:
            return s
        # if s not in output:
        #     continue
       # else:
    return 'X'




if __name__ == "__main__":
    # 读取csv文件
    data = pd.read_csv('train11-2.csv')
    # 删除label列的所有空行
    data = data[data['label'].notna()]
    
    # 重置索引
    data = data.reset_index(drop=True)
    print(data.head(5))
    x = []
    #output_type = {0: '积极', 1: '消极'}
    
    
    output_type = {0: '积极', 1: '消极', 2:'中性'}
    
    
    t0=time.time()
    for i in range(0, len(data)):
        value = {}
        value['instrution'] = '请分析评论的情感色彩,仅回复{}'.format("或者".join(list(output_type.values())))
        value['input'] = data['text_a'][i]
        output = query({"messages": [{"role": "user", "content": value['instrution']+value['input']}]})
        # print(list(output_type.values()))
        output = return_type(output['result'], t=list(output_type.values()))
        print(value['input'], output)
        value['output'] = output
        x.append(value)
    
        if len(x)>10:
            # t1 = time.time()
            # print(t1-t0)
            break
    with jsonlines.open('outputjsonl222222.jsonl', 'w') as file:
        file.write_all(x)
  • 5
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值