分类算法(三)—— LR NB SVM KNN 调用示例

# -*- encoding=utf-8 -*-

from sklearn import svm
from sklearn import  neighbors, linear_model
from sklearn import metrics
from sklearn.feature_extraction.text import  TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import jieba
import pyltp
from classification_practice.practice_one.search_count_auto_words import AutoWordsCounter
from statistic.default_string_search import DefaultStringSearch as ACSearcher
import pandas as pd
from sklearn import metrics
import jieba
from sklearn.ensemble import GradientBoostingRegressor,GradientBoostingClassifier
from sklearn.externals import joblib
# segmentor = pyltp.Segmentor()
# segmentor.load("\\LTPModel\\cws.model")


def read_files(filename):
    x_train = []
    y_train = []

    with open(filename,'r',encoding="utf-8") as lines:
        for line in lines:
            item = line.replace("\n","").strip().split("\t")
            sentence = " ".join(jieba.cut(item[2]))
            # sentence = " ".join(list(segmentor.segment(item[1])))
            sentence = sentence

            x_train.append(sentence)
            y_train.append(int(item[1]))
        return x_train,y_train

def read_files1(filename):
    x_train = []
    y_train = []
    df = pd.read_excel(filename).drop_duplicates()
    labels = df["sentiment"].map({"负面": -1, "中性": 0, "正面": 1})
    for sentiment,sentence in zip(labels,df['sentence']):
        cut_sentence = " ".join(jieba.cut(sentence))
        x_train.append(cut_sentence)
        y_train.append(sentiment)
    return x_train,y_train


vect = TfidfVectorizer(min_df=2,max_df=0.8)
v_test,v_useless = read_files("train1.5.1.txt")
vect.fit(v_test)
knn = neighbors.KNeighborsClassifier()
logistic = linear_model.LogisticRegression(solver='newton-cg')
X_train, y_train = read_files("train1.5.1.txt")

X_train = vect.transform(X_train)
X_test, y_test = read_files("test1.2.0.txt")
X_test = vect.transform(X_test)

nb = MultinomialNB()
nb.fit(X_train, y_train)
X_predict = nb.predict(X_test)

print ("NaiveBayes Result:")
print(metrics.classification_report(y_test, X_predict))
###################################################
knn = neighbors.KNeighborsClassifier()
knn.fit(X_train, y_train)

t = knn.predict(X_test)
print('KNN score: %f' % knn.score(X_test, y_test))
print(metrics.classification_report(y_test, t))
#################################################
print("LR ")
logistic = linear_model.LogisticRegression(solver='newton-cg')
logistic.fit(X_train, y_train)
X_predict = logistic.predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("SVM-linear ")
C = 1.0  # SVM regularization parameter
X_predict = svm.SVC(kernel='linear', C=C).fit(X_train, y_train).predict(X_test)
# for ture_result, predict_result in zip(y_test,X_predict):
#     print(ture_result+"\t"+ predict_result)
print(metrics.classification_report(y_test, X_predict))

#################################################
print("SVM-Rbf ")
X_predict =  svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X_train, y_train).predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("SVM-poly ")
X_predict =  svm.SVC(kernel='poly', degree=3, C=C).fit(X_train, y_train).predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("SVM-svc ")
X_predict =   svm.LinearSVC(C=C).fit(X_train, y_train).predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################

print("finish! ")

相关介绍后续补充……

十分钟上手sklearn:特征提取,常用模型,交叉验证 - 知乎

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

微知girl

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值