自动调用Python Api进行情感测试

最新推荐文章于 2022-10-18 18:43:57 发布

M.D

最新推荐文章于 2022-10-18 18:43:57 发布

阅读量223

点赞数

分类专栏： test 文章标签： sentiment_test

本文链接：https://blog.csdn.net/weixin_43456772/article/details/103494719

版权

test 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

-- coding: utf-8 --

“”"
Created on Mon Oct 22 17:47:24 2018

@author: Python_test
“”"
import requests
import pandas as pd
import jieba
#import pkuseg
#seg = pkuseg.pkuseg()
#读取需要处理的excel档
f2 = pd.read_excel(‘test_20191210.xlsx’,sheet_name = “test”)
#mat = “”"
#我都唔明呢間學校嘅校方高層個腦諗乜，你點樣投誠都唔夠濠江中學咁根正苗紅㗎啦粵華之秘相信咁多位粵華仔都知道，由今個學期開始，每週一嘅週會都要升「中華人民共和國」嘅國旗。跟住係週會結束之際，係偉大嘅祖国嘅国旗之下，公然宣佈高二級要去天主教嘅教堂念玫瑰經。咁站係偉大祖国嘅立場，係咪犯左「煽動巔覆國家政權罪」呢?一邊係度愛国愛黨，一邊又係度信祖国唔鍾意嘅宗教😢，學校會唔會
#"""

query = “”"
query(
$KaTeX parse error: Expected '}', got 'EOF' at end of input: \dots sentences:$ sentences
)
}
“”"
#因api需要登陆，所以需要拿最新token的url
url = “http://python-api.access_token”#自设

for i in range(len(f2)):
sen = f2.iloc[i,0]
variables= {
“sentences”:sen
}
data = {
“query”:query,
“variables”: variables
}
try:
result = requests.post(url,json=data)
#print(result)
output = result.json()
# print(result.json())

    a = output['data']['sentiment']['data']['result']
#    b = output['data']['sentiment']['data']['score'] 
#    c = output['data']['sentiment']['data']['proportion']
    d = output['data']['sentiment']['data']['words']
    f2.loc[i,'result'] = str(a)
#    f2.loc[i,'score'] = str(b)
#    f2.loc[i,'proportion'] = str(c)
    f2.loc[i,'words'] = str(d)

except Exception as e:
    print(str(e))
#    fx.loc[i,'runtime'] = float(t)#直接生成浮点型
#    a=[t]
#    a+=a
print(i)

print(output)

#将结果保存为excel

#f2.to_excel(“ers_时事sentiment（0103~0109）情感素材收集_James.xlsx”,index = False)
#seg結合自定義詞庫切詞
#f4 = pd.read_csv(“ers_sentiment_dict.txt”)[‘ciyu’].tolist()
f3 = pd.read_csv(“ers_sentiment_dict.txt”)

l =[]
for i in range(len(f3)):

 test = f3.iloc[i][0].split(" ")[0]
 l.append(test)

f4 = l
#f3 = pd.read_csv(“universal_20191021.txt”)

# 獲取詞庫列表

##seg = pkuseg.pkuseg(user_dict=f3)
#f5 =pd.read_csv("") #
def addDict(words: list):
“”"
jieba.add_word(word, freq=None, tag=None)
freq and tag can be omitted, freq defaults to be a calculated value
that ensures the word can be cut out.
“”"
if words:
for i in keys:
if i:
jieba.add_word(i[0])

keys = pd.read_csv(“universal_20191204.txt”,header = None)
for i in range(len(keys)):
words = keys.loc[i][0].split(" ")[0]
addDict(words)
print(“开始添加:”,i,words)
jieba.load_userdict(“universal_20191204.txt”) ##词频要超过10000000才会优先切词
keys.to_excel(“test.xlsx”)
f1 = pd.read_excel(“test.xlsx”)
f1= f1.rename(columns = {0:“rawdata”})
f2 = pd.DataFrame(f1,columns =[“rawdata”,“Allcutword”,“Lucutword”,“compare”])
for i in range(len(keys)):
try:
sentence = keys.loc[i][0]
Allcutword = jieba.lcut(sentence,cut_all = True)
Lcutword = jieba.lcut(sentence)
f2.loc[i,“Allcutword”] = str(Allcutword)
f2.loc[i,“Lcutword”] = str(Lcutword)
print(“成功切词：{}”.format(i),sentence)
except:
print(“报错：{t}”.format(t =i),sentence)

f2.to_excel(“切词测试.xlsx”)

#def addDict(dict_list):

“”"

jieba.add_word(word, freq=None, tag=None)

freq and tag can be omitted, freq defaults to be a calculated value

that ensures the word can be cut out.

“”"

if dict_list:

for i in dict_list:

if i:

jieba.add_word(i)#jieba.add_word，和suggest_freq都是强制调高詞頻

#addDict(“universal_20191030.txt”)
#f5 = pd.read_csv(“universal_20191030.txt”)
#for i in range(len(f5)):

words= f5.iloc[i][0].split(" ")[0]

jieba.add_word(words)

#jieba.add_word(“universal_20191030.txt”)
#s = “衷心感谢”
#test1 = jieba.cut(s)
#for i in test1:

print(i)

#test2 = jieba.lcut(s)
#f2 = pd.read_excel(“sentiment（924~1023）result训练集James.xlsx”)
#f2[‘cutword’] = f2[“content”].apply(lambda x:seg.cut(x)) # 切詞
f2[‘cutword’] = f2[“contents”].apply(lambda x:jieba.lcut(x)) # 切詞

#xx = jieba.lcut(“共築中國夢奮進新時代,返工頂你個肺”)
f2[‘newword’] = f2[‘cutword’].apply(lambda x: [i for i in x if i not in f4])

#e = [‘cutword’]
#f2.loc[‘cutword’] = str(e)
f2.to_excel(“testsentiment-result3.xlsx”,index = False)