# 机器学习总结(一)：线性回归、岭回归、Lasso回归（转载待整理）

https://blog.csdn.net/hzw19920329/article/details/77200475

1. 线性回归的一般形式
2. 线性回归中可能遇到的问题
3. 过拟合问题及其解决方法
4. 线性回归代码实现
5. 岭回归与Lasso回归
6. 岭回归以及Lasso回归代码实现
7. 线性回归的一般形式

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model, discriminant_analysis, cross_validation

return cross_validation.train_test_split(diabetes.data, diabetes.target, test_size=0.25, random_state=0)

def test_LinearRegression(*data):
X_train, X_test, y_train, y_test = data
#通过sklearn的linear_model创建线性回归对象
linearRegression = linear_model.LinearRegression()
#进行训练
linearRegression.fit(X_train, y_train)
#通过LinearRegression的coef_属性获得权重向量,intercept_获得b的值
print("权重向量:%s, b的值为:%.2f" % (linearRegression.coef_, linearRegression.intercept_))
#计算出损失函数的值
print("损失函数的值: %.2f" % np.mean((linearRegression.predict(X_test) - y_test) ** 2))
#计算预测性能得分
print("预测性能得分: %.2f" % linearRegression.score(X_test, y_test))

if __name__ == '__main__':
#获得数据集
X_train, X_test, y_train, y_test = load_data()
#进行训练并且输出预测结果
test_LinearRegression(X_train, X_test, y_train, y_test)

261.47657106   -8.83343952  135.93715156  703.22658427   28.34844354], b的值为:153.07

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model, discriminant_analysis, cross_validation

return cross_validation.train_test_split(diabetes.data, diabetes.target, test_size=0.25, random_state=0)

def test_ridge(*data):
X_train, X_test, y_train, y_test = data
ridgeRegression = linear_model.Ridge()
ridgeRegression.fit(X_train, y_train)
print("权重向量:%s, b的值为:%.2f" % (ridgeRegression.coef_, ridgeRegression.intercept_))
print("损失函数的值:%.2f" % np.mean((ridgeRegression.predict(X_test) - y_test) ** 2))
print("预测性能得分: %.2f" % ridgeRegression.score(X_test, y_test))

#测试不同的α值对预测性能的影响
def test_ridge_alpha(*data):
X_train, X_test, y_train, y_test = data
alphas = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]
scores = []
for i, alpha in enumerate(alphas):
ridgeRegression = linear_model.Ridge(alpha=alpha)
ridgeRegression.fit(X_train, y_train)
scores.append(ridgeRegression.score(X_test, y_test))
return alphas, scores

def show_plot(alphas, scores):
figure = plt.figure()
ax.plot(alphas, scores)
ax.set_xlabel(r"$\alpha$")
ax.set_ylabel(r"score")
ax.set_xscale("log")
ax.set_title("Ridge")
plt.show()

if __name__ == '__main__':
#使用默认的alpha
#获得数据集
#X_train, X_test, y_train, y_test = load_data()
#进行训练并且预测结果
#test_ridge(X_train, X_test, y_train, y_test)

#使用自己设置的alpha
X_train, X_test, y_train, y_test = load_data()
alphas, scores = test_ridge_alpha(X_train, X_test, y_train, y_test)
show_plot(alphas, scores)

Lasso回归代码示例

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model, discriminant_analysis, cross_validation

return cross_validation.train_test_split(diabetes.data, diabetes.target, test_size=0.25, random_state=0)

def test_lasso(*data):
X_train, X_test, y_train, y_test = data
lassoRegression = linear_model.Lasso()
lassoRegression.fit(X_train, y_train)
print("权重向量:%s, b的值为:%.2f" % (lassoRegression.coef_, lassoRegression.intercept_))
print("损失函数的值:%.2f" % np.mean((lassoRegression.predict(X_test) - y_test) ** 2))
print("预测性能得分: %.2f" % lassoRegression.score(X_test, y_test))

#测试不同的α值对预测性能的影响
def test_lasso_alpha(*data):
X_train, X_test, y_train, y_test = data
alphas = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]
scores = []
for i, alpha in enumerate(alphas):
lassoRegression = linear_model.Lasso(alpha=alpha)
lassoRegression.fit(X_train, y_train)
scores.append(lassoRegression.score(X_test, y_test))
return alphas, scores

def show_plot(alphas, scores):
figure = plt.figure()
ax.plot(alphas, scores)
ax.set_xlabel(r"$\alpha$")
ax.set_ylabel(r"score")
ax.set_xscale("log")
ax.set_title("Ridge")
plt.show()

if __name__=='__main__':
X_train, X_test, y_train, y_test = load_data()
# 使用默认的alpha
#test_lasso(X_train, X_test, y_train, y_test)
# 使用自己设置的alpha
alphas, scores = test_lasso_alpha(X_train, X_test, y_train, y_test)
show_plot(alphas, scores)

python大战机器学习
Andrew Ng机器学习公开课
http://www.jianshu.com/p/35e67c9e4cbf
http://freemind.pluskid.org/machine-learning/sparsity-and-some-basics-of-l1-regularization/#ed61992b37932e208ae114be75e42a3e6dc34cb3

03-10 7780

04-21 1430
08-15 9万+
04-02 4700
07-10 62
11-11 2万+
10-30 2722
08-26 633