Python机器学习及实践(二、回归预测——支持向量机回归于K近邻回归)

Python机器学习及实践(二、回归预测——支持向量机回归于K近邻回归)

代码及注释

输入:

#支持向量机
from sklearn.datasets import load_boston
boston=load_boston()
print( boston.DESCR)
#.DESCR查看数据描述
from sklearn.model_selection import train_test_split
import numpy as np
X=boston.data
y=boston.target
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=33,test_size=0.25)
#由于最大值、最小值 以及中间值差别较大 所以需要进行标准化处理
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
ss_X=StandardScaler()
ss_y=StandardScaler()
X_train=ss_X.fit_transform(X_train)
X_test=ss_X.transform(X_test)
y_train=ss_y.fit_transform(y_train.reshape(-1,1))
y_test=ss_y.transform(y_test.reshape(-1,1))
 
#线性核的支持向量机
from sklearn.svm import SVR
linear_svr=SVR(kernel='linear')
linear_svr.fit(X_train,y_train)
linear_svr_y_predict=linear_svr.predict(X_test)
 
#使用多项式核
poly_svr=SVR(kernel='poly')
poly_svr.fit(X_train,y_train)
poly_svr_y_predict=poly_svr.predict(X_test)
 
#使用径向基函数
rbf_svr=SVR(kernel='rbf')
rbf_svr.fit(X_train,y_train)
rbf_svr_y_predict=rbf_svr.predict(X_test)
 
print("linear_svr:",linear_svr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,linear_svr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(linear_svr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(linear_svr_y_predict)))
print("poly_svr:",poly_svr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,poly_svr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(poly_svr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(poly_svr_y_predict)))
 
print("rbf_svr:",rbf_svr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,rbf_svr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rbf_svr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rbf_svr_y_predict)))


print("(________________________________________________________________________)")

#K近邻回归
from sklearn.neighbors import KNeighborsRegressor
#算数平均
uni_knr=KNeighborsRegressor(weights='uniform')
uni_knr.fit(X_train,y_train)
uni_knr_y_predict=uni_knr.predict(X_test)
 
#距离加权
dis_knr=KNeighborsRegressor(weights='distance')
dis_knr.fit(X_train,y_train)
dis_knr_y_predict=dis_knr.predict(X_test)
 
print("uni_knr:",uni_knr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,uni_knr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(uni_knr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(uni_knr_y_predict)))
print("dis_knr:",dis_knr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,dis_knr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dis_knr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dis_knr_y_predict)))

输出:

[8]:


#支持向量机
from sklearn.datasets import load_boston
boston=load_boston()
print( boston.DESCR)
#.DESCR查看数据描述
from sklearn.model_selection import train_test_split
import numpy as np
X=boston.data
y=boston.target
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=33,test_size=0.25)
#由于最大值、最小值 以及中间值差别较大 所以需要进行标准化处理
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
ss_X=StandardScaler()
ss_y=StandardScaler()
X_train=ss_X.fit_transform(X_train)
X_test=ss_X.transform(X_test)
y_train=ss_y.fit_transform(y_train.reshape(-1,1))
y_test=ss_y.transform(y_test.reshape(-1,1))
 
#线性核的支持向量机
from sklearn.svm import SVR
linear_svr=SVR(kernel='linear')
linear_svr.fit(X_train,y_train)
linear_svr_y_predict=linear_svr.predict(X_test)
 
#使用多项式核
poly_svr=SVR(kernel='poly')
poly_svr.fit(X_train,y_train)
poly_svr_y_predict=poly_svr.predict(X_test)
 
#使用径向基函数
rbf_svr=SVR(kernel='rbf')
rbf_svr.fit(X_train,y_train)
rbf_svr_y_predict=rbf_svr.predict(X_test)
 
print("linear_svr:",linear_svr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,linear_svr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(linear_svr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(linear_svr_y_predict)))
print("poly_svr:",poly_svr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,poly_svr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(poly_svr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(poly_svr_y_predict)))
 
print("rbf_svr:",rbf_svr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,rbf_svr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rbf_svr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rbf_svr_y_predict)))
​
​
print("(________________________________________________________________________)")
​
#K近邻回归
from sklearn.neighbors import KNeighborsRegressor
#算数平均
uni_knr=KNeighborsRegressor(weights='uniform')
uni_knr.fit(X_train,y_train)
uni_knr_y_predict=uni_knr.predict(X_test)
 
#距离加权
dis_knr=KNeighborsRegressor(weights='distance')
dis_knr.fit(X_train,y_train)
dis_knr_y_predict=dis_knr.predict(X_test)
 
print("uni_knr:",uni_knr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,uni_knr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(uni_knr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(uni_knr_y_predict)))
print("dis_knr:",dis_knr.score(X_test,y_test))
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
print("R-squared:",r2_score(y_test,dis_knr_y_predict))
print("MSE:",mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dis_knr_y_predict)))
print("MAE:",mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dis_knr_y_predict)))
​



.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
        - LSTAT    % lower status of the population
        - MEDV     Median value of owner-occupied homes in $1000's

    :Missing Attribute Values: None

    :Creator: Harrison, D. and Rubinfeld, D.L.

This is a copy of UCI ML housing dataset.
https://archive.ics.uci.edu/ml/machine-learning-databases/housing/


This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.

The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic
prices and the demand for clean air', J. Environ. Economics & Management,
vol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics
...', Wiley, 1980.   N.B. Various transformations are used in the table on
pages 244-261 of the latter.

The Boston house-price data has been used in many machine learning papers that address regression
problems.   
     
.. topic:: References

   - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.
   - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.

linear_svr: 0.650659546421538
R-squared: 0.650659546421538
MSE: 27.088311013556027
MAE: 3.4328013877599624
poly_svr: 0.40365065102550846
R-squared: 0.40365065102550846
MSE: 46.24170053103929
MAE: 3.73840737104651
rbf_svr: 0.7559887416340944
R-squared: 0.7559887416340944
MSE: 18.92094886153873
MAE: 2.6067819999501114
(________________________________________________________________________)
uni_knr: 0.6907212176346006
R-squared: 0.6907212176346006
MSE: 23.981877165354337
MAE: 2.9650393700787396
dis_knr: 0.7201094821421603
R-squared: 0.7201094821421603
MSE: 21.703073090490353
MAE: 2.801125502210876

知识点

1.from sklearn.svm import SVR

SVR回归与SVM分类的区别在于,SVR的样本点最终只有一类,它所寻求的最优超平面不是SVM那样使两类或多类样本点分的“最开”,而是使所有的样本点离着超平面的总偏差最小。
kernel: type: string;optional(default=‘rbf’)

算法中所使用的核函数类型,其中有(‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’,‘precomputer’,默认使用‘rbf’)

degree:type:int, optional(default=3)

多项式核函数的次数,默认为3,所以当核函数为‘poly’时用到,其它核函数忽略

核函数的系数, 在核函数为‘rbf’, ‘poly’, ‘sigmoid’时使用,其他核函数忽略。gamma的值必须大于0, 随着gamma的增大,对于测试集分类效果越差,对于训练集分类效果好,并且使模型的复杂度提高,泛化能力(对未知数的预测能力)较差,从而出现过拟合的情况。

详细讲述链接:https://www.jianshu.com/p/399ddcac2178

2.from sklearn.neighbors import KNeighborsRegressor

K近邻(回归)模型同样是无参数模型,只是借助K个最近训练样本的目标数值,对待测样本的回归值进行决策。即根据样本的相似度预测回归值。
衡量样本待测样本回归值的不同方式:
(1)对K个近邻目标数值使用普通的算术平均算法
(2)对K个近邻目标数值考虑距离的差异进行加权平均。
通过weights参数进行选择

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值