今天继续做了一些小的尝试,算作技术铺垫。
from nltk.book import *
print("*****import nltk.book OK")
print(sorted([w for w in set(text7) if '-'in w and 'index' in w]))
print('\n')
print(sorted([wd for wd in set(text3) if wd.istitle() and len(wd)> 10]))
print('\n')
print(sorted([w for w in set(sent7) if not w.islower()]))
print('\n')
print(sorted([t for t in set(text2) if 'cie' in t or 'cei'in t]))
print('\n')
for xyzzy in sent1:
if xyzzy.endswith('l'):
print xyzzy
for token in sent1:
if token.islower():
print token, 'is a lowercase word'
elif token.istitle():
print token, 'is a titlecase word'
else:
print token, 'is punctuation'
#请注意 print 语句结尾处的逗号,它告诉 Python在同一行输出。
tricky = sorted([w for w in set(text2) if 'cie' in w or 'cei' in w])
for word in tricky:
print word,
和机器人对话
import nltk
nltk.chat.chatbots()