NLTK
nltk.tokenize.word_tokenize(s)
http://www.nltk.org/_modules/nltk/tokenize.html
Numpy
np.zeros((D, N))
np.random.shuffle(data)
pandas
data = pd.read_csv('spambase.data').as_matrix()
sklearn
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(Xtrain, Ytrain)
print "Classification rate for NB:", model.score(Xtest, Ytest)
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier()
model.fit(Xtrain, Ytrain)
print "Classification rate for AdaBoost:", model.score(Xtest, Ytest)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(Xtrain, Ytrain)
print "Classification rate for LogisticRegression:", model.score(Xtest, Ytest)