正则化:保留所有的特征值,但是减少量级或者参数theta的大小
L1正则化:Lasso回归
公式:
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.linear_model import SGDRegressor
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
lasso_reg = Lasso(alpha=0.15)
lasso_reg.fit(X, y)
print(lasso_reg.predict([[1.5]]))
print(lasso_reg.coef_)
sgd_reg = SGDRegressor(penalty='l1', n_iter=1000)
sgd_reg.fit(X, y.ravel())
print(sgd_reg.predict([[1.5]]))
print(sgd_reg.coef_)
L2正则化:Ridge回归
公式:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.linear_model import SGDRegressor
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
# alpha是惩罚项里的alpha, solver处理数据的方法,auto是根据数据自动选择,svd是解析解,sag就是随机梯度下降
ridge_reg = Ridge(alpha=1, solver='auto')
# 学习过程
ridge_reg.fit(X, y)
# 预测
print(ridge_reg.predict([[1.5], [2], [2.5]]))
# 打印截距
print(ridge_reg.intercept_)
# 打印系数
print(ridge_reg.coef_)
"""
岭回归和sgd & penalty=2是等价的
"""
sgd_reg = SGDRegressor(penalty='l2')
sgd_reg.fit(X, y.ravel())
print(sgd_reg.predict([[1.5], [2], [2.5]]))
# 打印截距
print("W0=", sgd_reg.intercept_)
# 打印系数
print("W1=", sgd_reg.coef_)
基于L1+L2的回归:Elastic Net回归
from sklearn import linear_model, datasets
from sklearn import datasets,linear_model,discriminant_analysis
from sklearn.model_selection import train_test_split
import numpy as np
def load_data():
diabetes = datasets.load_diabetes()
return train_test_split(diabetes.data, diabetes.target, test_size=0.25, random_state=0)
def test_ElasticNet(*data):
X_train,X_test,y_train,y_test=data
regr = linear_model.ElasticNet()
regr.fit(X_train,y_train)
print('Coefficients:%s,intercept:%.2f'% (regr.coef_,regr.intercept_))
print('Residual sum of squares:%.2f'% np.mean((regr.predict(X_test)-y_test)**2))
print('Score:%.2f'% regr.score(X_test,y_test))
X_train,X_test,y_train,y_test=load_data()
test_ElasticNet(X_train,X_test,y_train,y_test)