sklearn/naive_bayes/训练/分类

# -*- coding: utf-8 -*-
"""
Created on Mon Apr 23 10:39:20 2018


@author: NAU
"""


# -*- coding: utf-8 -*-
"""
Created on Sun Apr 22 19:29:14 2018


@author: NAU
"""


#导入包
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB


#导入文件
f = open('E:\\train.txt', 'r')
train=f.read()
f.close()
data = []
train_content=[]
train_target=[]
lines=train.strip().split('\n')
for line in lines:
    one = line.strip().split(',')
    data.append((one[0], one[1]))
    train_content.append(one[0])
    train_target.append(one[1])
#    train_content = data[:1]
 #   train_target = data[1:]


vectorizer = CountVectorizer() 
transformer = TfidfTransformer() 
tfidf=transformer.fit_transform(vectorizer.fit_transform(train_content))
word = vectorizer.get_feature_names()
weight = tfidf.toarray()


clf = MultinomialNB().fit(tfidf, train_target)


#准确率计算                                
new_tfidf = transformer.fit_transform(vectorizer.transform(train_content))
predicted = clf.predict(new_tfidf)
print (predicted)
trueNum=0;
for i in range(0,len(train_content)):
    if predicted[i]==train_target[i]:
        trueNum=trueNum+1
    i=i+1
a=trueNum/len(train_content)    
print("精确度:"+str(a))  


#预测数据
docs = []
ft = open('E:\\test.txt', 'r')
train=ft.read()
ft.close()
docs=train.strip().split('\n') 
new_tfidf = transformer.fit_transform(vectorizer.transform(docs))
predicted = clf.predict(new_tfidf)
print (predicted)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值