python零基础做机器学习实验(决策树/朴素贝叶斯/神经网络/支持向量机/随机森林/判别分析)

45 篇文章 1 订阅
27 篇文章 1 订阅
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score,cross_validate
from time import time

#1.交叉验证
data=pd.read_csv('datasets/agaricus-lepiota_dropEmpty_dig.csv',header=None)
Y=data[0]
X=data[data.columns[1:]]

#决策树
# clf = tree.DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None, max_features=10, max_leaf_nodes=None, min_samples_leaf=3, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=1, splitter='random')
#CART
# clf = tree.DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None, max_features=10, max_leaf_nodes=None, min_samples_leaf=3, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=1, splitter='random')
#高斯朴素贝叶斯
# clf=GaussianNB()
# K近邻
# clf=KNeighborsClassifier(3)
# 随机森林
# clf=RandomForestClassifier(max_depth=8, random_state=1)
#支持向量机
# clf=SVC(kernel='rbf',C=1.0)
# 多层感知机
# clf=MLPClassifier(hidden_layer_sizes=20,activation='relu',max_iter=200,random_state=1)
# adaboost
clf=AdaBoostClassifier(n_estimators=10)

start=time()
scores=cross_val_score(clf,X,Y,cv=10)
print(time()-start)
print(scores)
print(scores.mean())

#2.划分训练集和测试集
# from sklearn.model_selection import train_test_split
# data=pd.read_csv('datasets/agaricus-lepiota_dropEmpty_dig.csv',header=None)
# Y=data[0]
# X=data[data.columns[1:]]
# x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=1)


#3.训练集和测试集分离
# train_data=pd.read_excel('HCV_train.xls',header=None)
# train_feature=train_data[train_data.columns[:train_data.shape[1]-1]].values
# train_label=list(train_data[train_data.columns[train_data.shape[1]-1]])
# clf = tree.DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None, max_features=7, max_leaf_nodes=None, min_samples_leaf=3, min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=1, splitter='random')
# # clf = GaussianNB()
# # clf=KNeighborsClassifier(2)
# # clf=GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True)
#
# clf = clf.fit(train_feature,train_label)
#
# test_data=pd.read_excel('HCV_test.xls',header=None)
# test_feature=test_data[test_data.columns[:test_data.shape[1]-1]].values
# test_label=list(test_data[test_data.columns[test_data.shape[1]-1]])
# test_pred=clf.predict(test_feature)
# print(np.mean(test_pred==test_label))
# print(confusion_matrix(test_label, test_pred))
# print(classification_report(test_label, test_pred))
  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Regent Wan

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值