# 机器学习之线性回归（Linear Regression）

1.假设输入的X和Y是线性关系，预测的y与X通过线性方程建立机器学习模型

2.输入的Y和X之间满足方程Y=X+e,e是误差项，噪音项，假设e是独立同分布的，服从IID（independent and identity distribution）和均值为0，方差为某一定数的正态分布（也叫高斯分布）e服从正态分布是由中新计值定理决定的

2.1方程式表示：

2.2 损失函数（Loss function）

3.1 当矩阵可逆（满秩）时，通过normal equation可以直接求解

3.2 当矩阵不可逆（非满秩）时，通过梯度下降求解

4.1当矩阵满秩时，引入正则项后的变为：

4.2当矩阵不满秩时，引入正则项后的变为：

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn.cross_validation import train_test_split

n=100
x = np.arange(1,100,n)+np.random.randn(n)
y = 4*x - 3 + np.random.randn(n)
plt . figure ()
plt . plot(x, y, 'r*', label='X')
plt . ylabel (" Y"  )
plt . xlabel (" X")
plt . legend(loc="best")
plt . tight_layout()
plt . show()
'''
data = ['C:\\Users\\123\\Desktop\\weather\\2015.txt',]
w = np. loadtxt ( data [0] , skiprows =1)
y = w[:,7]/10
x = w[:,10]
plt . figure ()
plt . plot(x,y,"b*",label="Atmospheric pressure")
plt . ylabel (" Temperatures"  )
plt . xlabel ("Atmospheric pressure "  )
plt . title (' Temperatures trent chart of Shanghai in year 2015 ')
plt . tight_layout()
plt . legend(loc="best")
plt . show()

x = x.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)
lr = LinearRegression()
lr . fit ( x_train , y_train)
y_lr = lr.predict ( x_test )
cv = RidgeCV(alphas=np.logspace(-3, 2, 100))
cv . fit ( x_train , y_train)
y_cv = cv.predict ( x_test )
print lr.coef_
print lr.intercept_
print "mes of Linear Regresion squares is", np. mean(( y_lr - y_test ) ** 2)
print "accuracy of Linear regression is",lr.score(x_test,y_test)
print cv.coef_
print cv.intercept_
print "mes of Linear Regresion+Ridge squares is", np. mean(( y_cv - y_test ) ** 2)
print "accuracy of Linear regression is",cv.score(x_test,y_test)

x1 = np.arange(len(x_test))
plt.plot(x1,y_test,"y*-",label="Test")
plt.plot(x1,y_lr,"ro-",label="Predict")
plt.plot(x1,y_cv,"b^-",label="Predict+Ridge")
plt . ylabel (" Temperatures"  )
plt . xlabel (" Atmospheric pressure")
plt . title (' Predict chart ')
plt . legend(loc="best")
plt . tight_layout()
plt . show()