依存树

import spacy
import networkx as nx
import re
import nltk
import nltk.data
#情感分析
from textblob import TextBlob
from nltk.corpus import sentiwordnet
from collections import defaultdict

nlp=spacy.load('en_core_web_sm')

alltext=[]
sentext=[]
juzi=''
file = open("C:/Users/24224/Desktop/Harry Potter 1 .txt","r",encoding='utf-8')
content = file.read()
s2 = ''.join(i for i in content)
#print(s2)
blob = TextBlob(s2)
#分句
blob = blob.sentences
print(blob[1])

for i in range(len(blob)):
    print("内容是:",blob[i],blob[i].sentiment)
#dep_ 为nsubj
def splitSentence(paragraph):
    tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = tokenizer.tokenize(paragraph)
    return sentences

with open("C:/Users/24224/Desktop/课程内容集合/自然语言处理/Harry Potter 1 - Sorcerer's Stone.txt","rb")as text:
    for lines in text:
        lines=str(lines)
        if lines=='\n':
            continue
        else:
            line =lines.strip().split('\n')
            #juzi+=str(line)
            alltext.append(line)
    #alltext=splitSentence(juzi)
    #print(alltext)
        #alltext.append(line)

    for sens in alltext:
        text_sentence=nlp(sens[0])
        for sentence in text_sentence.sents:
            sentext.append(sentence)
    print("sentext",sentext)
    word_pos_dict=defaultdict(str)
    for sen in sentext:
        H=re.findall(r'\bHarry\b|\bPotter\b',str(sen))
        if H:
            doc=nlp(str(sen))
        else:
            continue
        for token in doc:
            word_pos_dict[token.dep_]=token.text
        if word_pos_dict['nsubj']!='' and word_pos_dict['ROOT']!="" and word_pos_dict['dobj']!="":
            if word_pos_dict['nsubj']=='Harry' or word_pos_dict['dobj']=="Harry":
                print(word_pos_dict['nsubj'],word_pos_dict['ROOT'],word_pos_dict['dobj'])
        word_pos_dict=defaultdict(str)

#pobj介词宾语, dobj 直接宾语,det限定词,nsubj名词主语,nn 名词复合修饰语
doc=nlp("I come to bring Harry to his aunt and uncle.")
#for token in doc:
    #print(("token.head.text,token.text,token.dep_",token.head.text,token.text,token.dep_))
#spacy.display.serve(doc,style='dep')
edge=[]
for token in doc:
    for child in token.children:
        edge.append(('{0}'.format(token.lower_),'{0}'.format(child.lower_)))
graph=nx.Graph(edge)
#pobj介词宾语, dobj 直接宾语,det限定词,nsubj名词主语,nn 名词复合修饰语
print(graph)

entity1='I'.lower()
entity2='Harry'.lower()
print(nx.shortest_path_length(graph,source=entity1,target=entity2))
print(nx.shortest_path(graph,source=entity1,target=entity2))

nlp=spacy.load('en_core_web_sm')
doc=nlp( "spaCy uses the terms head and child to describe the words" )
for token in doc:
    print('{0}({1}) <-- {2} -- {3}({4})'.format(token.text, token.tag_, token.dep_, token.head.text, token.head.tag_))

from spacy import displacy
nlp = spacy.load('en_core_web_sm')
doc = nlp( "spaCy uses the terms head and child to describe the words" )
displacy.serve(doc, style='dep')
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值