自动调用Python Api进行情感测试

-- coding: utf-8 --

“”"
Created on Mon Oct 22 17:47:24 2018

@author: Python_test
“”"
import requests
import pandas as pd
import jieba
#import pkuseg
#seg = pkuseg.pkuseg()
#读取需要处理的excel档
f2 = pd.read_excel(‘test_20191210.xlsx’,sheet_name = “test”)
#mat = “”"
#我都唔明呢間學校嘅校方高層個腦諗乜,你點樣投誠都唔夠濠江中學咁根正苗紅㗎啦粵華之秘相信咁多位粵華仔都知道,由今個學期開始,每週一嘅週會都要升「中華人民共和國」嘅國旗。跟住係週會結束之際,係偉大嘅祖国嘅国旗之下,公然宣佈高二級要去天主教嘅教堂念玫瑰經。咁站係偉大祖国嘅立場,係咪犯左「煽動巔覆國家政權罪」呢?一邊係度愛国愛黨,一邊又係度信祖国唔鍾意嘅宗教😢,學校會唔會
#"""

query = “”"
query(
KaTeX parse error: Expected '}', got 'EOF' at end of input: … sentences:sentences
)
}
“”"
#因api需要登陆,所以需要拿最新token的url
url = “http://python-api.access_token”#自设

for i in range(len(f2)):
sen = f2.iloc[i,0]
variables= {
“sentences”:sen
}
data = {
“query”:query,
“variables”: variables
}
try:
result = requests.post(url,json=data)
#print(result)
output = result.json()
# print(result.json())

    a = output['data']['sentiment']['data']['result']
#    b = output['data']['sentiment']['data']['score'] 
#    c = output['data']['sentiment']['data']['proportion']
    d = output['data']['sentiment']['data']['words']
    f2.loc[i,'result'] = str(a)
#    f2.loc[i,'score'] = str(b)
#    f2.loc[i,'proportion'] = str(c)
    f2.loc[i,'words'] = str(d)

except Exception as e:
    print(str(e))
#    fx.loc[i,'runtime'] = float(t)#直接生成浮点型
#    a=[t]
#    a+=a
print(i)

print(output)

#将结果保存为excel

#f2.to_excel(“ers_时事sentiment(0103~0109)情感素材收集_James.xlsx”,index = False)
#seg結合自定義詞庫切詞
#f4 = pd.read_csv(“ers_sentiment_dict.txt”)[‘ciyu’].tolist()
f3 = pd.read_csv(“ers_sentiment_dict.txt”)

l =[]
for i in range(len(f3)):

 test = f3.iloc[i][0].split(" ")[0]
 l.append(test)

f4 = l
#f3 = pd.read_csv(“universal_20191021.txt”)

# 獲取詞庫列表

##seg = pkuseg.pkuseg(user_dict=f3)
#f5 =pd.read_csv("") #
def addDict(words: list):
“”"
jieba.add_word(word, freq=None, tag=None)
freq and tag can be omitted, freq defaults to be a calculated value
that ensures the word can be cut out.
“”"
if words:
for i in keys:
if i:
jieba.add_word(i[0])

keys = pd.read_csv(“universal_20191204.txt”,header = None)
for i in range(len(keys)):
words = keys.loc[i][0].split(" ")[0]
addDict(words)
print(“开始添加:”,i,words)
jieba.load_userdict(“universal_20191204.txt”) ##词频要超过10000000才会优先切词
keys.to_excel(“test.xlsx”)
f1 = pd.read_excel(“test.xlsx”)
f1= f1.rename(columns = {0:“rawdata”})
f2 = pd.DataFrame(f1,columns =[“rawdata”,“Allcutword”,“Lucutword”,“compare”])
for i in range(len(keys)):
try:
sentence = keys.loc[i][0]
Allcutword = jieba.lcut(sentence,cut_all = True)
Lcutword = jieba.lcut(sentence)
f2.loc[i,“Allcutword”] = str(Allcutword)
f2.loc[i,“Lcutword”] = str(Lcutword)
print(“成功切词:{}”.format(i),sentence)
except:
print(“报错:{t}”.format(t =i),sentence)

f2.to_excel(“切词测试.xlsx”)

#def addDict(dict_list):

“”"

jieba.add_word(word, freq=None, tag=None)

freq and tag can be omitted, freq defaults to be a calculated value

that ensures the word can be cut out.

“”"

if dict_list:

for i in dict_list:

if i:

jieba.add_word(i)#jieba.add_word,和suggest_freq都是强制调高詞頻

#addDict(“universal_20191030.txt”)
#f5 = pd.read_csv(“universal_20191030.txt”)
#for i in range(len(f5)):

words= f5.iloc[i][0].split(" ")[0]

jieba.add_word(words)

#jieba.add_word(“universal_20191030.txt”)
#s = “衷心感谢”
#test1 = jieba.cut(s)
#for i in test1:

print(i)

#test2 = jieba.lcut(s)
#f2 = pd.read_excel(“sentiment(924~1023)result训练集James.xlsx”)
#f2[‘cutword’] = f2[“content”].apply(lambda x:seg.cut(x)) # 切詞
f2[‘cutword’] = f2[“contents”].apply(lambda x:jieba.lcut(x)) # 切詞

#xx = jieba.lcut(“​共築中國夢奮進新時代,返工頂你個肺”)
f2[‘newword’] = f2[‘cutword’].apply(lambda x: [i for i in x if i not in f4])

#e = [‘cutword’]
#f2.loc[‘cutword’] = str(e)
f2.to_excel(“testsentiment-result3.xlsx”,index = False)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值