目标:
1.掌握SDK文档的使用
2.强化tkinter图像界面的编辑
ttk.Treeview()创建列表,以及树状结构
3.递归函数的使用
百度参考文档:https://ai.baidu.com/docs#/NLP-Python-SDK/top
图形界面
源代码
# 导包
from aip import AipNlp #百度文本分析api
from tkinter import * #python界面开发
from tkinter import ttk
from tkinter.filedialog import askopenfilename,askdirectory
class Text_analysis():
"""
SDK开发
"""
def __init__(self):
# 参数初始化化
""" 你的 APPID AK SK """
self.APP_ID = '175243421'
self.API_KEY = 'QvXanIzUbqWTmQQhkqDGG3WY'
self.SECRET_KEY = 'LtbuUbj7yk9NxX86QN1iO3XcAc7gl4ln'
self.client = AipNlp(self.APP_ID, self.API_KEY, self.SECRET_KEY)
self.myWindow = Tk()
self.text_1=StringVar()
self.text_2=StringVar()
self.text_3 = StringVar() #短文本相似度
self.text_4 = StringVar() # 词向量
self.text_5 = StringVar() # 词1
self.text_6 = StringVar() # 词2
self.text_7 = StringVar() # 词相似结果
self.text_8 = StringVar() # 文本标题
self.model=StringVar() #文本相似度模型
self.model_1=StringVar() #文本相似度模型
self.industry=StringVar() #行业
self.tree_list=[]
self.tree_dict={}
def tree(self,value):
# self.tree=ttk.Treeview()
tree={}
postag={'Ag':'形语素','g':'语素','ns':'地名' ,'u':'助词','a': '形容词','h':'前接成分' ,'nt': '机构团体', 'vg':'动语素',
'ad': '副形词', 'i': '成语', 'nz': '其他专名', 'v': '动词',
'an' :'名形词', 'j': '简称略语', 'o': '拟声词', 'vd': '副动词',
'b': '区别词', 'k': '后接成分' ,'p': '介词', 'vn': '名动词',
'c': '连词', 'l': '习用语', 'q': '量词', 'w': '标点符号',
'dg': '副语素', 'm': '数词', 'r': '代词', 'x' :'非语素字',
'd': '副词' ,'Ng': '名语素', 's' :'处所词', 'y': '语气词',
'e': '叹词', 'n': '名词', 'tg': '时语素', 'z': '状态词',
'f': '方位词', 'nr': '人名', 't': '时间词', 'un': '未知词'}
deprel={ 'ATT':'定中关系','QUN':'数量关系','COO':'并列关系','APP':'同位关系','ADJ':'附加关系',
'VOB': '动宾关系', 'POB': '介宾关系', 'SBV': '主谓关系', 'SIM': '比拟关系', 'TMP': '时间关系',
'LOC': '处所关系', 'DE': '“的”字结构', 'DI': '“地”字结构', 'DEI': '“得”字结构', 'SUO': '“所”字结构',
'BA': '“把”字结构', 'BEI': '“被”字结构', 'ADV': '状中结构', 'CMP': '动补结构', 'DBL': '兼语结构',
'CNJ': '关联词', 'CS': '关联结构', 'MT': '语态结构', 'VV': '连谓结构', 'HED': '核心', 'FOB': '前置宾语',
'DOB': '双宾语', 'TOP': '主题', 'IS': '独立结构', 'IC': '独立分句', 'DC': '依存分句', 'VNV': '叠词关系',
'YGC': '一个词', 'WP': '标点'}
# print(value)
if len(value)>0:
for item in value:
if len(self.tree_list)==0:
if item['head']==0:
name_1='head_'+str(item['id'])
self.tree_list.append('head_'+str(item['id']))
# print('插入一级树'+str(item['word']))
# self.tree_dict[name_1]=self.tree_3.insert("",item['id'],text=str(item['word'])+'('+deprel[item['deprel']]+')',values=(str(item['id'])),open=True)
self.tree_dict[name_1] = self.tree_3.insert("", item['id'],text=str(item['word']) + '(' + deprel[item['deprel']] + ')', open=True)
value.remove(item)
break
elif 'head_'+str(item['head']) in self.tree_list:
name_2 = 'head_' + str(item['id'])
self.tree_list.append('head_' + str(item['id']))
# self.tree_dict[name_2] = self.tree_3.insert(self.tree_dict['head_'+str(item['head'])], 'end', text=str(item['word'])+'('+deprel[item['deprel']]+')', values=(str(item['id'])),open=True)
self.tree_dict[name_2] = self.tree_3.insert(self.tree_dict['head_' + str(item['head'])], 'end',text=str(item['word']) + '(' + deprel[ item['deprel']] + ')', open=True)
# print('插入下一级树'+str(item['word']))
value.remove(item)
break
self.tree(value)
return None
def lexical_analysis(self):
'''
词法分析
:return:
'''
pos={'n': '普通名词', 'f': '方位名词', 's': '处所名词', 't': '时间名词',
'nr': '人名', 'ns': '地名', 'nt' :'机构团体名', 'nw': '作品名',
'nz': '其他专名', 'v': '普通动词', 'vd': '动副词', 'vn': '名动词',
'a': '形容词', 'ad': '副形词', 'an': '名形词', 'd': '副词',
'm': '数量词', 'q': '量词', 'r': '代词', 'p': '介词',
'c' :'连词','u' : '助词', 'xc': '其他虚词', 'w': '标点符号'}
ne={'PER': '人名', 'LOC': '地名', 'ORG': '机构名', 'TIME': '时间'}
items = self.tree_1.get_children()
[self.tree_1.delete(item) for item in items]
text = self.text_1.get()
# print(text)
""" 调用词法分析 """
results=self.client.lexer(text)
if 'error_code'in results:
print(results)
else:
for i,result in enumerate(results['items']):
if len(result['pos']):
string_1=pos[result['pos']]
elif len(result['ne']):
string_1=ne[result['ne']]
self.tree_1.insert('', i, values=(str(i+1),result['item'],string_1,'/'.join(result['basic_words'])))
# tree.bind("<Double-1>", onDBClick) #绑定按键功能
# self.tree_1.pack()
def syntax_analysis(self):
'''
依存句法分析
:return:
'''
self.tree_list.clear()
self.tree_dict.clear()
items = self.tree_3.get_children()
[self.tree_3.delete(item) for item in items]
text=self.text_1.get()
mode={'Web模型':0,'Query模型':1}
""" 如果有可选参数 """
options = {}
options["mode"] = mode[self.com_1.get()]
""" 带参数调用依存句法分析 """
results=self.client.depParser(text, options)
# print(results)
self.tree(results["items"])
# print(result)
def language_model(self):
'''
语言模型
:return:
'''
items = self.tree_2.get_children()
[self.tree_2.delete(item) for item in items]
text = self.text_1.get()
""" 调用DNN语言模型 """
results=self.client.dnnlm(text)
self.text_2.set(results['ppl'])
for i, result in enumerate(results['items']):
self.tree_2.insert('', i, values=(str(i + 1), result["word"], result["prob"]))
def text_similar(self):
'''
短文本相似度
:return:
'''
text1=self.text_21.get(1.0,5.0)
text2=self.text_22.get(1.0,5.0)
options = {}
options["model"] = self.com.get()
""" 带参数调用短文本相似度 """
result=self.client.simnet(text1, text2,options )
self.text_3.set(result['score'])
# print('计算短文本相似度')
def commentary_get(self):
'''
评论观点抽取
:return:
'''
sentiments = {0: '消极', 1: '中性', 2: '积极'}
self.text_32.delete(1.0, 'end') # 删除文本框内容
text=self.text_31.get(1.0,5.0)
indust_dict={'酒店':1, 'KTV':2,'丽人':3,'美食餐饮':4,'旅游':5,'健康':6,'教育':7,'商业':8,'房产':9,'汽车':10,'生活':11,'购物':12,'3C':13}
options = {}
options["type"] = indust_dict[self.com_31.get()]
""" 带参数调用评论观点抽取 """
result=self.client.commentTag(text, options)
# print(result)
x=len(result['items'])-1
i=0
for item in result['items']:
a='观点倾向:'+str(sentiments[item['sentiment']])+'\n'+'短句摘要:'+item['abstract']+'\n'+'匹配属性词:'+item['prop']+'\n'+'匹配描述词:'+item['adj']+'\n'
# print(a)
self.text_32.insert('insert',a)
if i<x:
self.text_32.insert('insert','-'*50+'\n')
i=i+1
# print('评论观点抽取')
def emotion_analysis(self):
'''
情感分析
:return:
'''
self.text_32.delete(1.0,'end') #删除文本框内容
sentiments={0:'负向',1:'中性',2:'正向'}
text = self.text_31.get(1.0, 5.0)
""" 调用情感倾向分析 """
result=self.client.sentimentClassify(text)
# print(result)
sentiment=sentiments[result['items'][0]['sentiment']]
confidence=result['items'][0]['confidence']
self.text_32.insert('insert','情感分析:{} '.format(sentiment))
self.text_32.insert('insert','\n')
self.text_32.insert('insert', '置信度:{}'.format(confidence))
def word_vector(self):
'''
词向量的求解
:return:
'''
word=self.text_4.get()
""" 调用词向量表示 """
result=self.client.wordEmbedding(word)
# print(result['vec'])
for vector in result['vec']:
# print(vector)
self.text_41.insert('insert', vector)
self.text_41.insert('insert','\n')
def word_similar(self):
'''
词向量的相似度
:return:
'''
word1 = self.text_5.get()
word2 = self.text_6.get()
# print(word1)
# print(word2)
""" 调用词义相似度 """
result=self.client.wordSimEmbedding(word1, word2)
self.text_7.set(result['score'])
def document_label(self):
'''
文档标签
:return:
'''
self.text_62.delete(1.0, 'end')
title = self.text_8.get()
content = self.text_61.get(1.0,5.0)
# print(title)
# print(content)
""" 调用文章标签 """
result=self.client.keyword(title, content)
for item in result['items']:
self.text_62.insert('insert',str(item['tag'])+':'+str(item['score']))
self.text_62.insert('insert', '\n')
def document_class(self):
"""
文档分类
:return:
"""
self.text_62.delete(1.0,'end')
title = self.text_8.get()
content = self.text_61.get(1.0, 5.0)
# print(content)
""" 调用文章分类 """
result=self.client.topic(title, content)
lv1_tag=result['item']['lv1_tag_list'][0]
lv2_tag=result['item']['lv2_tag_list'][0]
self.text_62.insert('insert', '一级分类结果')
self.text_62.insert('insert', '\n')
self.text_62.insert('insert', ' '+str(lv1_tag['tag']) + ':' + str(lv1_tag['score']))
self.text_62.insert('insert', '\n')
self.text_62.insert('insert', '二级分类结果')
self.text_62.insert('insert', '\n')
self.text_62.insert('insert', ' ' + str(lv2_tag['tag']) + ':' + str(lv2_tag['score']))
self.text_62.insert('insert', '\n')
def interface(self):
'''
绘制界面
:return:
'''
self.myWindow.title('自然语言处理演示程序') # 修改窗口标题
self.myWindow.geometry("1200x600+150+100") # 修改窗口大小 width*height+xoffset+yoffset
# 创建五个标签容器
#frm_1 = LabelFrame(self.myWindow, text='语法分析/依存句法分析/DNN语言模型', width=360, height=580)
frm_1 = LabelFrame(self.myWindow, text='语法分析/依存句法分析/DNN语言模型', width=280, height=550)
frm_1.grid(row=0, column=0, ipadx=5, ipady=10, padx=10, pady=10,rowspan=3)
frm_2 = LabelFrame(self.myWindow, text='短文本相似度', width=520, height=150)
frm_2.grid(row=0, column=1,ipadx=5, padx=10, pady=10,columnspan=2)
frm_3 = LabelFrame(self.myWindow, text='评论观点/情感倾向分析', width=520, height=180)
frm_3.grid(row=1, column=1,ipadx=5, padx=10, pady=10,columnspan=2)
frm_4 = LabelFrame(self.myWindow, text='词向量', width=260, height=180)
frm_4.grid(row=2, column=1,ipadx=5, padx=10, pady=10,columnspan=1)
frm_5 = LabelFrame(self.myWindow, text='词向量相似度', width=260, height=180)
frm_5.grid(row=2, column=2,ipadx=5, padx=10, pady=10,columnspan=1)
frm_6 = LabelFrame(self.myWindow, text='文章标签/文章分类', width=300, height=550)
frm_6.grid(row=0, column=3,ipadx=5, ipady=10, padx=10, pady=10,rowspan=3)
#frm_1控件编辑
Entry(frm_1, textvariable=self.text_1, width=40).grid(row=0, column=0, sticky='EW', pady=5, padx=5,columnspan=3)
self.text_1.set('我们在北航学习人工智能') #设定默认值
Button(frm_1, text='词法分析', command=self.lexical_analysis, width=10, height=1).grid(row=1, column=0, sticky='E',pady=5, padx=5,columnspan=3)
text1=Text(frm_1, width=40, height=10)
text1.grid(row=2, column=0, sticky='EW',pady=5, padx=5,columnspan=3)
#建立词法分析树
self.tree_1=ttk.Treeview(text1,height=5, columns=('col1','col2','col3','col4'),show='headings')
self.tree_1.column('col1', width=40, anchor='center')
self.tree_1.column('col2', width=90, anchor='center')
self.tree_1.column('col3', width=90, anchor='center')
self.tree_1.column('col4', width=90, anchor='center')
self.tree_1.heading('col1', text='序号')
self.tree_1.heading('col2', text='分词')
self.tree_1.heading('col3', text='词性')
self.tree_1.heading('col4', text='基本词')
self.tree_1.grid(row=0, column=0, sticky='EW')
Label(frm_1, text="分析模型").grid(row=3, column=0, pady=5, padx=5)
self.com_1 = ttk.Combobox(frm_1, textvariable=self.model, value=['Web模型', 'Query模型'], width=10, height=1)
self.com_1.current(0)
self.com_1.grid(row=3, column=1, pady=5, padx=5)
Button(frm_1, text='依存句法分析', command=self.syntax_analysis, width=10).grid(row=3, column=2,pady=5, padx=5)
text_12=Text(frm_1, width=40, height=8)
text_12.grid(row=4, column=0, sticky='EW', pady=5, padx=5, columnspan=3)
self.tree_3 = ttk.Treeview(text_12,height=5,show='tree', columns=('col1'))
self.tree_3.column('col1', width=100, anchor='center')
self.tree_3.grid(row=0, column=0, sticky='EW')
Label(frm_1, text="句子通顺值").grid(row=5, column=0, pady=5, padx=5)
Entry(frm_1,textvariable=self.text_2).grid(row=5, column=1, pady=5, padx=5)
Button(frm_1, text='DNN语言模型', command=self.language_model, width=10).grid(row=5, column=2, pady=5, padx=5)
text_13=Text(frm_1, width=40, height=8)
text_13.grid(row=6, column=0, sticky='EW', pady=5, padx=5, columnspan=3)
# 建立语言模型树
self.tree_2 = ttk.Treeview(text_13, height=4, columns=('col1', 'col2', 'col3'), show='headings')
self.tree_2.column('col1', width=40, anchor='center')
self.tree_2.column('col2', width=130, anchor='center')
self.tree_2.column('col3', width=140, anchor='center')
self.tree_2.heading('col1', text='序号')
self.tree_2.heading('col2', text='分词')
self.tree_2.heading('col3', text='概率值')
self.tree_2.grid(row=0, column=0, sticky='EW')
# frm_2控件编辑
self.text_21=Text(frm_2, width=25, height=8)
self.text_21.grid(row=0, column=0, sticky='EW', pady=5, padx=5, columnspan=3)
self.text_21.insert(INSERT,'创建好账号后,在正式调用AI能力之前,需要您创建一下应用,这个是调用服务的基础能力单元,不可或缺哦')
self.text_22=Text(frm_2, width=25, height=8)
self.text_22.grid(row=0, column=4, sticky='EW', pady=5, padx=5, columnspan=3)
self.text_22.insert(INSERT,'创建帐户后,需要在调用AI之前创建应用程序。这是呼叫服务的基本能力单位。这是不可缺少的')
Label(frm_2, text="计算模型", width=8).grid(row=1, column=0, pady=5, padx=5)
self.com = ttk.Combobox(frm_2, textvariable=self.model_1, value=['BOW', 'GRNN','CNN'], width=8)
self.com.current(0)
self.com.grid(row=1, column=1, pady=5, padx=5)
Button(frm_2, text='短文本相似度', command=self.text_similar, width=11).grid(row=1, column=2, pady=5,padx=5, columnspan=2)
Label(frm_2, text="结果", width=8).grid(row=1, column=4, pady=5, padx=5)
Entry(frm_2, textvariable=self.text_3, width=17).grid(row=1, column=5, pady=5, padx=5, columnspan=2)
# frm_3控件编辑
self.text_31=Text(frm_3, width=65, height=4)
self.text_31.grid(row=0, column=0, sticky='EW', pady=5, padx=5, columnspan=5)
self.text_31.insert(INSERT,'个人觉得这车不错,外观漂亮年轻,动力和操控性都很好')
Label(frm_3, text="行业:", width=4).grid(row=1, column=0, pady=5, padx=5, sticky='W')
indust_list=['酒店', 'KTV', '丽人', '美食餐厅', '旅游', '健康', '教育', '商业', '房产', '汽车', '生活', '购物', '3C']
self.com_31 = ttk.Combobox(frm_3, textvariable=self.industry, value=indust_list, width=8)
self.com_31.current(9)
self.com_31.grid(row=1, column=1, pady=5, padx=5, sticky='EW')
Button(frm_3, text='评论观点抽取', command=self.commentary_get,width=12).grid(row=1, column=3, pady=5,padx=5, sticky='EW')
Button(frm_3, text='情感倾向分析', command=self.emotion_analysis,width=12).grid(row=1, column=4, pady=5,padx=5)
self.text_32=Text(frm_3, width=52, height=4)
self.text_32.grid(row=2, column=0, sticky='EW', pady=5, padx=5, columnspan=5)
# frm_4控件编辑
Entry(frm_4, textvariable=self.text_4, width=18).grid(row=0, column=0, pady=5, padx=5)
self.text_4.set('智能')
Button(frm_4, text='词向量', command=self.word_vector, width=10).grid(row=0, column=1, pady=5, padx=5)
self.text_41=Text(frm_4, width=25, height=6)
self.text_41.grid(row=1, column=0, sticky='EW', pady=5, padx=5, columnspan=2)
# frm_5控件编辑
Label(frm_5, text="词1", width=4).grid(row=0, column=0, pady=5, padx=5)
Entry(frm_5, textvariable=self.text_5, width=20).grid(row=0, column=1, pady=5, padx=5, columnspan=2)
self.text_5.set('打扰')
Label(frm_5, text="词2", width=4).grid(row=1, column=0, pady=5, padx=5)
Entry(frm_5, textvariable=self.text_6, width=20).grid(row=1, column=1, pady=5, padx=5, columnspan=2)
self.text_6.set('打搅')
Button(frm_5, text='词义相似度', command=self.word_similar, width=10).grid(row=2, column=2, pady=5, padx=5, sticky='EW')
Label(frm_5, text="结果", width=4).grid(row=3, column=0, pady=5, padx=5)
Entry(frm_5, textvariable=self.text_7, width=20).grid(row=3, column=1, pady=5, padx=5, columnspan=2)
# frm_6控件编辑
Label(frm_6, text="文章标题", width=8).grid(row=0, column=0, pady=5, padx=5, sticky='W')
Entry(frm_6, textvariable=self.text_8, width=40).grid(row=1, column=0, pady=5, padx=5, columnspan=3)
self.text_8.set('欧洲冠军杯足球赛')
Label(frm_6, text="文章内容", width=8).grid(row=2, column=0, pady=5, padx=5, sticky='W')
self.text_61=Text(frm_6, width=25, height=15)
self.text_61.grid(row=3, column=0, sticky='EW', pady=5, padx=5, columnspan=3)
self.text_61.insert('insert','欧洲冠军联赛是欧洲足球协会联盟主办的年度足球比赛,代表欧洲俱乐部足球最高荣誉和水平,被认为是全世界最高素质、最具影响力以及最高水平的俱乐部赛事,亦是世界上奖金最高的足球赛事和体育赛事之一。')
Button(frm_6, text='文章标签', command=self.document_label, width=10).grid(row=4, column=0, pady=5, padx=5,sticky='EW')
Button(frm_6, text='文章分类', command=self.document_class, width=10).grid(row=4, column=2, pady=5, padx=5, sticky='EW')
Label(frm_6, text="计算结果", width=8).grid(row=5, column=0, pady=5, padx=5, sticky='W')
self.text_62=Text(frm_6, width=25, height=10)
self.text_62.grid(row=6, column=0, sticky='EW', pady=5, padx=5, columnspan=3)
# 启动主窗口的消息循环
self.myWindow.mainloop()
if __name__ == '__main__':
text=Text_analysis()
text.interface()