个人备忘用,后期会持续追加啊,如果有大佬愿意在评论区给做出补充的话,感激不尽[\抱拳]
测试集、训练集拆分
x_train,x_test,y_train,y_test=train_test_split(csr_matrix(X_train_tfidf,dtype=np.float),train_target,test_size=0.2)
朴素贝叶斯
from sklearn.naive_bayes import MultinomialNB
# 朴素贝叶斯
clf = MultinomialNB()
clf.fit(x_train, y_train)
from sklearn import metrics
predicted_train = clf.predict(x_train)
print(metrics.classification_report(y_train, predicted_train))
print("accurary\t"+str(np.mean(predicted_train == y_train)))
predicted_test = clf.predict(x_test)
print(metrics.classification_report(y_test, predicted_test))
print("accurary\t"+str(np.mean(predicted_test == y_test)))
随机森林分类器
# 随机森林
clf = RandomForestClassifier(n_jobs=16,n_estimators=50,random_state=123)
clf.fit(x_train, y_train)
from sklearn import metrics
predicted_train = clf.predict(x_train)
print(metrics.classification_report(y_train, predicted_train))
print("accurary\t"+str(np.mean(predicted_train == y_train)))
predicted_test = clf.predict(x_test)
print(metrics.classification_report(y_test, predicted_test))
print("accurary\t"+str(np.mean(predicted_test == y_test)))
逻辑回归
# 逻辑回归
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(n_jobs=16)
clf.fit(x_train, y_train)
from sklearn import metrics
predicted_train = clf.predict(x_train)
print(metrics.classification_report(y_train, predicted_train))
print("accurary\t"+str(np.mean(predicted_train == y_train)))
predicted_test = clf.predict(x_test)
print(metrics.classification_report(y_test, predicted_test))
print("accurary\t"+str(np.mean(predicted_test == y_test)))
决策树分类器
# 决策树分类器
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(x_train, y_train)
from sklearn import metrics
predicted_train = clf.predict(x_train)
print(metrics.classification_report(y_train, predicted_train))
print("accurary\t"+str(np.mean(predicted_train == y_train)))
predicted_test = clf.predict(x_test)
print(metrics.classification_report(y_test, predicted_test))
print("accurary\t"+str(np.mean(predicted_test == y_test)))
# 评估结果
"""
precision recall f1-score support
(准确率) (召回率) (评分)
0.0 1.00 1.00 1.00 458400
1.0 1.00 1.00 1.00 111600
2.0 1.00 1.00 1.00 641720
3.0 1.00 1.00 1.00 76502
4.0 1.00 1.00 1.00 363963
accuracy 1.00 1652185
macro avg 1.00 1.00 1.00 1652185
(平均值)
weighted avg 1.00 1.00 1.00 1652185
(加权平均值)
"""