利用Knn, 决策树, 高斯朴树贝叶斯,对行为进行预测

import numpy as np
import pandas as pd

from sklearn.preprocessing import Imputer  #预处理函数 处理缺失值
from sklearn.model_selection import train_test_split  #自动分成训练集和测试集
from sklearn.metrics import classification_report  #预测结果评估函数

from sklearn.neighbors import KNeighborsClassifier #K近邻分类起
from sklearn.tree import DecisionTreeClassifier   #决策树分类起
from sklearn.naive_bayes import GaussianNB   #高斯朴树贝叶斯分类起


def load_dataset(feature_path, label_path):
    #读取文件列表和标签文件列表的内容, 归并后返回
    feature = np.ndarray(shape=(0,41))
    label = np.ndarray(shape=(0,1))

    for file in feature_path:
        #使用逗号分割符读取特征数据, 将问号替换成缺失值, 没有文件头
        df = pd.read_table(file, delimiter=',', na_values='?', header = None)
        #建立补全缺失值模型, 使用平均值补全缺失值,然后将数据补全
        imp = Imputer(missing_values='Nan', strategy='mean', axis=0)
        imp.fit(df)
        df = imp.transform(df)

        #将新的数据读入数据合并到特征集合中
        feature = np.concatenate((feature, df))
    for file in label_path:
        #读取标签数据, 文件中不包含表头
        df = pd.read_table(file, header=None)
        #将新读入的数据合并到标签集合中
        label = np.concatenate(label, df)

    label = np.ravel(label) #转化为一维

    return feature, label

if __name__ == '__main__':

    feature_paths = ['A.feature', 'B.feature', 'C.feature', 'D.feature', 'E.feature']
    label_paths = ['A.label', 'B.label', 'C.label', 'D.label', 'E.label']



    #将前4个数据作为训练集读入
    x_train, y_train = load_dataset(feature_paths[:4], label_paths[:4])
    #将后面的作为测试集读入
    x_test, y_test = load_dataset(feature_paths[4:], label_paths[4:])

    #将训练集打乱顺序
    x_train, x_, y_train, y_ = train_test_split(x_train,y_train, test_size=0.0)

    print('Start training knn')
    knn = KNeighborsClassifier().fit(x_train, y_train)
    print('Training done')
    answer_knn = knn.predict(x_test)
    print('Prediction done')

    print('Start training DT')
    dt = DecisionTreeClassifier().fit(x_train, y_train)
    print('Training done')
    answer_dt = dt.predict(x_test)
    print('Prediction done')

    print('Start training Bayes')
    gnb = GaussianNB().fit(x_train, y_train)
    print('Training done')
    answer_gnb = gnb.predict(x_test)
    print('Prediction done')

    # classification_report(正确值,和预测值),
    #  输出结果分别是 precision(准确率)  recall 召回率, f1-score F值  support
    print('\n\nThe classification report for knn:')
    print(classification_report(y_test, answer_knn))
    print('\n\nThe classification report for DT:')
    print(classification_report(y_test, answer_dt))
    print('\n\nThe classification report for Bayes:')
    print(classification_report(y_test, answer_gnb))
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值