from sklearn.datasets import load_files from sklearn.naive_bayes import MultinomialNB from sklearn.feature_extraction.text import TfidfVectorizer news_train = load_files('data/train') news_test = load_files('data/test') vect = TfidfVectorizer(encoding='latin-1') X_train = vect.fit_transform(news_train.data) X_test = vect.fit_transform(news_test.data) print(X_train) y_train = news_train.target y_test = news_test.target clf = MultinomialNB(alpha=0.0001) clf.fit(X_train, y_train) train_score = clf.score(X_train, y_train) print(train_score) test_score = clf.score(X_test, y_test) print(test_score)