from nltk.book import *
>>> type(text1)
http://nltk.googlecode.com/svn/trunk/doc/api/nltk.text.Text-class.html
text1.concordance("monstrous")
text1.similar("monstrous")
sorted(set(text3))
>>> f = FreqDist(text1)
>>> f
http://nltk.googlecode.com/svn/trunk/doc/api/nltk.probability.FreqDist-class.html
>>> v = f.keys()
>>> type(v)
>>> V = set(text1)
>>> len(V) == len(v)
True
>>> long_words = [w for w in V if len(w) > 15]
>>> sorted(long_words)
FunctionMeaning
s.startswith(t)
test if s starts with t
s.endswith(t)
test if s ends with t
t in s
test if t is contained inside s
s.islower()
test if all cased characters in s are lowercase
s.isupper()
test if all cased characters in s are uppercase
s.isalpha()
test if all characters in s are alphabetic
s.isalnum()
test if all characters in s are alphanumeric
s.isdigit()
test if all characters in s are digits
s.istitle()
test if s is titlecased (all words in s have have initial capitals)
bigrams(text1)
>>> [w.upper() for w in s] //cap all elements
>>> len(set([word.lower() for word in text1]))