import spacy
import networkx as nx
import re
import nltk
import nltk.data
#情感分析
from textblob import TextBlob
from nltk.corpus import sentiwordnet
from collections import defaultdict
nlp=spacy.load('en_core_web_sm')
alltext=[]
sentext=[]
juzi=''
file = open("C:/Users/24224/Desktop/Harry Potter 1 .txt","r",encoding='utf-8')
content = file.read()
s2 = ''.join(i for i in content)
#print(s2)
blob = TextBlob(s2)
#分句
blob = blob.sentences
print(blob[1])
for i in range(len(blob)):
print("内容是:",blob[i],blob[i].sentiment)
#dep_ 为nsubj
def splitSentence(paragraph):
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
sentences = tokenizer.tokenize(paragraph)
return sentences
with open("C:/Users/24224/Desktop/课程内容集合/自然语言处理/Harry Potter 1 - Sorcerer's Stone.txt","rb")as text:
for lines in text:
lines=str(lines)
if lines=='\n':
continue
else:
line =lines.strip().split('\n')
#juzi+=str(line)
alltext.append(line)
#alltext=splitSentence(juzi)
#print(alltext)
#alltext.append(line)
for sens in alltext:
text_sentence=nlp(sens[0])
for sentence in text_sentence.sents:
sentext.append(sentence)
print("sentext",sentext)
word_pos_dict=defaultdict(str)
for sen in sentext:
H=re.findall(r'\bHarry\b|\bPotter\b',str(sen))
if H:
doc=nlp(str(sen))
else:
continue
for token in doc:
word_pos_dict[token.dep_]=token.text
if word_pos_dict['nsubj']!='' and word_pos_dict['ROOT']!="" and word_pos_dict['dobj']!="":
if word_pos_dict['nsubj']=='Harry' or word_pos_dict['dobj']=="Harry":
print(word_pos_dict['nsubj'],word_pos_dict['ROOT'],word_pos_dict['dobj'])
word_pos_dict=defaultdict(str)
#pobj介词宾语, dobj 直接宾语,det限定词,nsubj名词主语,nn 名词复合修饰语
doc=nlp("I come to bring Harry to his aunt and uncle.")
#for token in doc:
#print(("token.head.text,token.text,token.dep_",token.head.text,token.text,token.dep_))
#spacy.display.serve(doc,style='dep')
edge=[]
for token in doc:
for child in token.children:
edge.append(('{0}'.format(token.lower_),'{0}'.format(child.lower_)))
graph=nx.Graph(edge)
#pobj介词宾语, dobj 直接宾语,det限定词,nsubj名词主语,nn 名词复合修饰语
print(graph)
entity1='I'.lower()
entity2='Harry'.lower()
print(nx.shortest_path_length(graph,source=entity1,target=entity2))
print(nx.shortest_path(graph,source=entity1,target=entity2))
nlp=spacy.load('en_core_web_sm')
doc=nlp( "spaCy uses the terms head and child to describe the words" )
for token in doc:
print('{0}({1}) <-- {2} -- {3}({4})'.format(token.text, token.tag_, token.dep_, token.head.text, token.head.tag_))
from spacy import displacy
nlp = spacy.load('en_core_web_sm')
doc = nlp( "spaCy uses the terms head and child to describe the words" )
displacy.serve(doc, style='dep')
依存树
最新推荐文章于 2023-05-23 08:39:46 发布