分类模型综合练习

利用sklearn自己构建一组含有1000个样本点,6个特征,3个类别的分类数据集(随机种子取666),并利用学过的分类器模型(例如传统的分类模型:决策树、朴素贝叶斯、K近邻、BP神经网络、逻辑回归、支持向量机等,以及集成学习模型:Voting、Bagging、随机森林、AdaBoost、GBDT)对数据进行训练,充分利用交叉验证及网格搜索调优,尽可能地提高模型的分类效果。模型分类效果请通过混淆矩阵、ROC曲线、学习曲线、验证曲线等方式进行展示。


'''
sklearn.datasets.make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2,  
                    n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None,  
                    flip_y=0.01, class_sep=1.0, hypercube=True,shift=0.0, scale=1.0,   
                    shuffle=True, random_state=None) 
通常用于分类算法。 
n_features :特征个数= n_informative + n_redundant + n_repeated 
n_informative:多信息特征的个数 
n_redundant:冗余信息,informative特征的随机线性组合 
n_repeated :重复信息,随机提取n_informative和n_redundant 特征 
n_classes:分类类别 
n_clusters_per_class :某一个类别是由几个cluster构成的
'''

from sklearn import datasets
import matplotlib.pyplot as plt 
 
data,target = datasets.make_classification(n_samples=1000,n_features=6,n_classes=3,random_state=666,n_clusters_per_class=1)
print(data.shape)
print(target.shape)
 
plt.scatter(data[:,0],data[:,1],c=target,cmap=plt.cm.spring,edgecolor='k')

在这里插入图片描述

x=data
y=target

数据拆分

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.2)

学习曲线

from sklearn.model_selection import learning_curve# 导入学习曲线
# 设置训练集大小
size=np.linspace(0.1,1,10)
def xuexiquxian(model,size):
    x,y=data,target
    train_sizes,train_scores,test_scores=learning_curve(model,x,y,train_sizes=size,cv=10)
    print(train_sizes)
# 绘制学习曲线
    plt.scatter(train_sizes,np.mean(train_scores,axis=1))
    plt.scatter(train_sizes,np.mean(test_scores,axis=1))
    plt.legend(['train_scores','test_scores'])
    plt.show()

交叉验证

from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import r2_score
def jiaochayanzheng(model):
    x,y=data,target
    indices=np.arange(y.shape[0])
    np.random.shuffle(indices)
    x,y=x[indices],y[indices]
    
   
    scores=cross_val_score(model,x,y,cv=10)
    print('将拆分与评价合并执行')
    print(scores)
    print(scores.mean(),scores.std())

   
    print('同时使用多个评价指标')
    scoring=['r2','explained_variance']
    scores=cross_validate(model,x,y,cv=10,scoring=scoring,return_train_score=False)
    print(scores)
    print(scores['test_r2'].mean())
    
    print('使用交互验证后的模型进行预测')
    pred=cross_val_predict(model,x,y,cv=10)
   
    print(r2_score(target,pred))

评价指标

def pinjia(model):
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    print('f1_score')
    from sklearn.metrics import f1_score
    print(f1_score(y_test,y_pred,average='micro'))
    print(f1_score(y_test,y_pred,average='macro'))
    print(f1_score(y_test,y_pred,average='weighted'))
    print('precision_score')
    from sklearn.metrics import precision_score
    print(precision_score(y_test,y_pred,average='micro'))
    print(precision_score(y_test,y_pred,average='macro'))
    print(precision_score(y_test,y_pred,average='weighted'))
    print('分类汇总报告')
    from sklearn.metrics import classification_report
    #分类汇总报告
    print(classification_report(y_test,y_pred,digits=3,#小数点后保留的位数
    labels=[0,1,2],#类别排序
    target_names=['第0类','第1类','第2类'],#类别名称
    output_dict=False)#结果是否以字典形式输出
    )
    print('混淆矩阵')
    #混淆举证
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(y_test,y_pred)
    print(cm)

    #热力图展示混淆矩阵
    %matplotlib inline
    import matplotlib.pyplot as plt
    import seaborn as sns

    sns.heatmap(cm,cmap=sns.color_palette("Blues"),annot=True)

决策树

from sklearn.tree import DecisionTreeClassifier

clf1=DecisionTreeClassifier()
xuexiquxian(clf1,size)

# 数据拆分
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.2)


from sklearn.tree import DecisionTreeClassifier

clf1=DecisionTreeClassifier()
clf1.fit(x_train,y_train)
y_pred_clf1=clf1.predict(x_test)
print(clf1.score(x_train,y_train),clf1.score(x_test,y_test))

clf1_1=DecisionTreeClassifier( max_depth=25,min_samples_split=500,max_leaf_nodes=20)
clf1_1.fit(x_train,y_train)
y_pred_clf1_1=clf1_1.predict(x_test)
print(clf1_1.score(x_train,y_train),clf1_1.score(x_test,y_test))

clf1_2=DecisionTreeClassifier( max_depth=9,min_samples_split=10,max_leaf_nodes=20)
clf1_2.fit(x_train,y_train)
y_pred_clf1_2=clf1_2.predict(x_test)
print(clf1_2.score(x_train,y_train),clf1_2.score(x_test,y_test))

pinjia(clf1)

jiaochayanzheng(clf1)
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
knc=KNeighborsClassifier()
xuexiquxian(knc,size)

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.45)

knc.fit(x_train,y_train)
y_pred_knc=knc.predict(x_test)
print(knc.score(x_train,y_train))
print(knc.score(x_test,y_test))
pinjia(knc)
jiaochayanzheng(knc)

在这里插入图片描述
在这里插入图片描述

from sklearn.neural_network import MLPClassifier
mlp=MLPClassifier()
xuexiquxian(mlp,size)

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.45)

mlp.fit(x_train,y_train)
y_pred_knc=mlp.predict(x_test)
print(mlp.score(x_train,y_train))
print(mlp.score(x_test,y_test))
pinjia(mlp)
jiaochayanzheng(mlp)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值