from time import time from sklearn.naive_bayes import MultinomialNB from sklearn.datasets import load_files from sklearn.feature_extraction.text import TfidfVectorizer print("loading train dataset...") t = time() new_train = load_files('C:/Users/Jamson/PycharmProjects/my_sklearn/dataset/379/train') print("summary: {0} documents in {1} categories".format(len(new_train.data), len(new_train.target_names))) print("done in {0} seconds".format(time() - t)) print("vectorizing train dataset ...") t = time() vectorizer = TfidfVectorizer(encoding='latin-1') X_train = vectorizer.fit_transform((d for d in new_train.data)) print("done in {0} seconds.".format(time() - t)) print("train models ...") t = time() y_train = new_train.target clf = MultinomialNB(alpha=0.0001) clf.fit(X_train, y_train) train_score = clf.score(X_train, y_train) print("train score: {0}".format(train_score
sklearn----native bayers
最新推荐文章于 2024-04-30 17:15:32 发布