依存树

最新推荐文章于 2023-05-23 08:39:46 发布

Chloemxc

最新推荐文章于 2023-05-23 08:39:46 发布

阅读量457

点赞数

本文链接：https://blog.csdn.net/qq_44814439/article/details/110562910

版权

import spacy
import networkx as nx
import re
import nltk
import nltk.data
#情感分析
from textblob import TextBlob
from nltk.corpus import sentiwordnet
from collections import defaultdict

nlp=spacy.load('en_core_web_sm')

alltext=[]
sentext=[]
juzi=''
file = open("C:/Users/24224/Desktop/Harry Potter 1 .txt","r",encoding='utf-8')
content = file.read()
s2 = ''.join(i for i in content)
#print(s2)
blob = TextBlob(s2)
#分句
blob = blob.sentences
print(blob[1])

for i in range(len(blob)):
    print("内容是:",blob[i],blob[i].sentiment)
#dep_ 为nsubj
def splitSentence(paragraph):
    tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = tokenizer.tokenize(paragraph)
    return sentences

with open("C:/Users/24224/Desktop/课程内容集合/自然语言处理/Harry Potter 1 - Sorcerer's Stone.txt","rb")as text:
    for lines in text:
        lines=str(lines)
        if lines=='\n':
            continue
        else:
            line =lines.strip().split('\n')
            #juzi+=str(line)
            alltext.append(line)
    #alltext=splitSentence(juzi)
    #print(alltext)
        #alltext.append(line)

    for sens in alltext:
        text_sentence=nlp(sens[0])
        for sentence in text_sentence.sents:
            sentext.append(sentence)
    print("sentext",sentext)
    word_pos_dict=defaultdict(str)
    for sen in sentext:
        H=re.findall(r'\bHarry\b|\bPotter\b',str(sen))
        if H:
            doc=nlp(str(sen))
        else:
            continue
        for token in doc:
            word_pos_dict[token.dep_]=token.text
        if word_pos_dict['nsubj']!='' and word_pos_dict['ROOT']!="" and word_pos_dict['dobj']!="":
            if word_pos_dict['nsubj']=='Harry' or word_pos_dict['dobj']=="Harry":
                print(word_pos_dict['nsubj'],word_pos_dict['ROOT'],word_pos_dict['dobj'])
        word_pos_dict=defaultdict(str)

#pobj介词宾语, dobj 直接宾语,det限定词,nsubj名词主语,nn 名词复合修饰语
doc=nlp("I come to bring Harry to his aunt and uncle.")
#for token in doc:
    #print(("token.head.text,token.text,token.dep_",token.head.text,token.text,token.dep_))
#spacy.display.serve(doc,style='dep')
edge=[]
for token in doc:
    for child in token.children:
        edge.append(('{0}'.format(token.lower_),'{0}'.format(child.lower_)))
graph=nx.Graph(edge)
#pobj介词宾语, dobj 直接宾语,det限定词,nsubj名词主语,nn 名词复合修饰语
print(graph)

entity1='I'.lower()
entity2='Harry'.lower()
print(nx.shortest_path_length(graph,source=entity1,target=entity2))
print(nx.shortest_path(graph,source=entity1,target=entity2))

nlp=spacy.load('en_core_web_sm')
doc=nlp( "spaCy uses the terms head and child to describe the words" )
for token in doc:
    print('{0}({1}) <-- {2} -- {3}({4})'.format(token.text, token.tag_, token.dep_, token.head.text, token.head.tag_))

from spacy import displacy
nlp = spacy.load('en_core_web_sm')
doc = nlp( "spaCy uses the terms head and child to describe the words" )
displacy.serve(doc, style='dep')

Chloemxc

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
依存树

import spacyimport networkx as nximport reimport nltkimport nltk.data#情感分析from textblob import TextBlobfrom nltk.corpus import sentiwordnetfrom collections import defaultdictnlp=spacy.load('en_core_web_sm')alltext=[]sentext=[]juzi=''file = o
复制链接

扫一扫