import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
def performance(y_true , predict , color = "g" , ann = True):
acc = accuracy_score(y_true , predict[:,1] > 0.5)
auc = roc_auc_score(y_true , predict[:,1])
fpr , tpr , thr = roc_curve(y_true , predict[:,1])
plt.figure()
plt.plot(fpr , tpr )
df = pd.read_csv("labeledTrainData.tsv" , delimiter="\t") #导入数据 tsv是按照\t分割的
print(df.head(50)) #查看数据存储结构
split = 0.7
d_train = df[:int(split * len(df))] #按照7:3的比例分为测试集和训练集
d_test = df[int((split) * len(df)) :]
print(len(df))
print(len(d_train))
print(len(d_test))
vectorizer = CountVectorizer() #初始化单词计数向量器
features = vectorizer.fit_transform(d_train.review) #训练样本特征值
test_features = vectorizer.t
kaggle 电影评论情感分析 贝叶斯分类
最新推荐文章于 2021-12-30 00:15:15 发布