python自然语言处理.2014年7月第一版课后习题练习
1.
>>> phrase=["Valentine's"]
>>> phrase=["lonely"]+phrase+["day"]
>>> phrase
['lonely', "Valentine's", 'day']
>>> phrase[1]
"Valentine's"
>>> phrase[1][1]
'a'
>>> phrase.index('day')
2
>>> sorted(phrase)
["Valentine's", 'day', 'lonely']
>>> phrase[1:2]
["Valentine's"]
>>> phrase*3
['lonely', "Valentine's", 'day', 'lonely', "Valentine's", 'day', 'lonely', "Valentine's", 'day']
2.
>>> from nltk.corpus import gutenberg
>>> gutenberg.fileids()
[u'austen-emma.txt', u'austen-persuasion.txt', u'austen-sense.txt', u'bible-kjv.txt', u'blake-poems.txt', u'bryant-stories.txt', u'burgess-busterbrown.txt', u'carroll-alice.txt', u'chesterton-ball.txt', u'chesterton-brown.txt', u'chesterton-thursday.txt', u'edgeworth-parents.txt', u'melville-moby_dick.txt', u'milton-paradise.txt', u'shakespeare-caesar.txt', u'shakespeare-hamlet.txt', u'shakespeare-macbeth.txt', u'whitman-leaves.txt']
>>> persuasion=gutenberg.words('austen-persuasion.txt')
>>> len(persuasion)
98171
>>> len(set(persuasion))//词类型,我不知道是不是指有多少个不一样的词
6132
3.
>>> from nltk.corpus import brown
>>> brown.categories()
[u'adventure', u'belles_lettres', u'editorial', u'fiction', u'government', u'hobbies', u'humor', u'learned', u'lore', u'mystery', u'news', u'religion', u'reviews', u'romance', u'science_fiction']
>>> brown.words(categories='lore')
[u'In', u'American', u'romance', u',', u'almost', ...]
>>> brown.words(categories='mystery')
[u'There', u'were', u'thirty-eight', u'patients', ...]
>>> from nltk.corpus import webtext
>>> webtext.fileids()
[u'firefox.txt', u'grail.txt', u'overheard.txt', u'pirates.txt', u'singles.txt', u'wine.txt']
>>> webtext.words('firefox.txt')
[u'Cookie', u'Manager', u':', u'"', u'Don', u"'", u't', ...]
>>> webtext.words('grail.txt