第八章8.3自然语言处理-库的基本运用

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
#下载nltk的data
# import nltk
# nltk.download()

#创建text对象
# from nlkt import word_tokenize
# from nltk import Text
# tokens=word_tokenize("here is some not very interesting text")
# text=Text(tokens)

#统计词频
# from nlkt import FreqDist
# from nltk.book import *
# #统计书籍中不重复的单词与总单词的数据之比
# #len(text6)/len(words)
#
# #统计出现频率最高的前十个单词
# fdist=FreqDist(text6)
# fdist.most_common(10)
# #查看某个单词的频率
# fdist["Grail"]

#创建并搜索2-ngram模型
# from nltk import bigrams
# from nltk.book import *
# bigrams=bigrams(text6,2)
# bigramsDict=FreqDist(bigrams)
# bigramsDict[("Sir","Robin")]

#nltk进行词性分析
# from nlkt import word_tokenize
# from nltk import pos_tag
# text=word_tokenize("the dust was thick so he had to dust")
# pos_tag(text)


#选择采集文字中的动词的google
from nltk import word_tokenize,sent_tokenize,pos_tag
sentences=sent_tokenize("Google is one of the best companies in the world.I constantly google myself to see what i am up to")
nouns=['NN','NNS','NNP','NNPS']
for sentence in sentences:
    if "google" in sentence.lower():
        taggleWords=pos_tag(word_tokenize(sentence))
        for word in taggleWords:
            if word[0].lower()=='google' and word[1] in nouns:
                print(sentence)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值