#!/usr/bin/env python
# _*_ coding:utf-8 _*_
#下载nltk的data
# import nltk
# nltk.download()
#创建text对象
# from nlkt import word_tokenize
# from nltk import Text
# tokens=word_tokenize("here is some not very interesting text")
# text=Text(tokens)
#统计词频
# from nlkt import FreqDist
# from nltk.book import *
# #统计书籍中不重复的单词与总单词的数据之比
# #len(text6)/len(words)
#
# #统计出现频率最高的前十个单词
# fdist=FreqDist(text6)
# fdist.most_common(10)
# #查看某个单词的频率
# fdist["Grail"]
#创建并搜索2-ngram模型
# from nltk import bigrams
# from nltk.book import *
# bigrams=bigrams(text6,2)
# bigramsDict=FreqDist(bigrams)
# bigramsDict[("Sir","Robin")]
#nltk进行词性分析
# from nlkt import word_tokenize
# from nltk import pos_tag
# text=word_tokenize("the dust was thick so he had to dust")
# pos_tag(text)
#选择采集文字中的动词的google
from nltk import word_tokenize,sent_tokenize,pos_tag
sentences=sent_tokenize("Google is one of the best companies in the world.I constantly google myself to see what i am up to")
nouns=['NN','NNS','NNP','NNPS']
for sentence in sentences:
if "google" in sentence.lower():
taggleWords=pos_tag(word_tokenize(sentence))
for word in taggleWords:
if word[0].lower()=='google' and word[1] in nouns:
print(sentence)