机器学习__1 线性回归

第1章 线性回归

线性回归是描述变量之间的线性关系。

1.1 线性回归模型

h w , b ( x ) = ∑ i = 1 n w i x i + b h_{w,b}(x) = \sum_{i=1}^n w_ix_i+b hw,b(x)=i=1nwixi+b

w = ( b , w 1 , w 2 , . . . . . . , w n ) T = ( w 0 , w 1 , w 2 , . . . . . . , w n ) T w = (b,w_1,w_2,......,w_n)^T = (w_0,w_1,w_2,......,w_n)^T w=(b,w1,w2,......,wn)T=(w0,w1,w2,......,wn)T

x = ( 1 , x 1 , x 2 , . . . . . . , x n ) T x = (1,x_1,x_2,......,x_n)^T x=(1,x1,x2,......,xn)T

h w ( x ) = ∑ i = 1 n w T x h_{w}(x) = \sum_{i=1}^n w^Tx hw(x)=i=1nwTx

1.2 最小二乘法

最小二乘法(OLS Ordinary Least Squares):用MSE作为损失函数求解参数w。MSE是均方误差,即使预测值与实际值差的平方和的均值。
X = ( 1 x 11 … x 1 n 1 x 21 … x 2 n ⋮ ⋮ ⋱ ⋮ 1 x m 1 … x m n ) = ( x 1 T x 2 T ⋮ x m T ) X=\begin{pmatrix} 1 & x_{11} & \dots &x_{1n} \\ 1 & x_{21} & \dots &x_{2n} \\ \vdots & \vdots & \ddots & \vdots \\ 1 & x_{m1} & \dots &x_{mn} \\ \end{pmatrix} \quad = \begin{pmatrix} x_1^T \\x_2^T \\ \vdots \\ x_m^T\end{pmatrix} \quad X=111x11x21xm1x1nx2nxmn=x1Tx2TxmT

W = ( w 0 w 1 ⋮ w n ) W=\begin{pmatrix} w_0 \\ w_1 \\ \vdots \\w_n\\ \end{pmatrix} \quad W=w0w1wn
J ( w ) = 1 2 m ∑ i = 1 m ( h w ( x i ) − y i ) 2 = 1 2 m ∑ i = 1 m ( w T x − y i ) 2 J(w) = \frac{1}{2m}\sum_{i=1}^{m}(h_w(x_i)-y_i)^2 = \frac{1}{2m}\sum_{i=1}^{m}(w^Tx-y_i)^2 J(w)=2m1i=1m(hw(xi)yi)2=2m1i=1m(wTxyi)2

Δ J ( w ) = 1 2 m ∑ i = 1 m α ( w T x − y i ) 2 α w \Delta J(w) = \frac{1}{2m}\sum_{i=1}^{m}\frac{\alpha (w^Tx-y_i)^2}{\alpha w} ΔJ(w)=2m1i=1mαwα(wTxyi)2

= 1 2 m ∑ i = 1 m 2 ( w T x − y i ) α ( w T x − y i ) α w = \frac{1}{2m}\sum_{i=1}^{m}2(w^Tx-y_i) \frac{\alpha(w^Tx-y_i)}{\alpha w} =2m1i=1m2(wTxyi)αwα(wTxyi)

= 1 2 m ∑ i = 1 m 2 ( w T x − y i ) x i = \frac{1}{2m}\sum_{i=1}^{m}2(w^Tx-y_i)x_i =2m1i=1m2(wTxyi)xi

= 1 m X T ( X w − y ) = \frac{1}{m}X^T(Xw-y) =m1XT(Xwy)

w ^ = ( X T X ) − 1 ( X T y ) \hat w= (X^TX)^{-1}(X^Ty) w^=(XTX)1(XTy)

w : = w − η 1 m X T ( X w − y ) w:= w-\eta \frac{1}{m}X^T(Xw-y) w:=wηm1XT(Xwy)

1.3 梯度下降

梯度下降法,所有的样本都参与训练。
w : = w − η 1 m X T ( X w − y ) w:= w-\eta \frac{1}{m}X^T(Xw-y) w:=wηm1XT(Xwy)
随机梯度下降法,每次只用一个样本的误差更新梯度。占用内存少,支持海量数据。当损失函数很不规则的时候,它更有可能跳过局部最小值,接近全局最小值,但是梯度数值震荡剧烈。每一次epoch,都要随机打乱数据集。
$$
w:= w-\eta (w^Tx_i-y_i)x_i

小 批 量 梯 度 下 降 法 , 每 次 使 用 一 小 批 样 本 更 新 参 数 。 小批量梯度下降法,每次使用一小批样本更新参数。 使

w:= w-\eta \frac{1}{N}\sum_{i=k}{K+N}(wTx_i-y_i)x_i
$$

1.4 算法实现

(1) 用矩阵乘法计算参数

import numpy as np

class OLSLinearRegression:

    def _preprocess_data_X(self,X):
        # 1 数据处理
        m , n = X.shape
        X_ = np.ones([m,n+1])
        X_[:,1:] = X
        return X_
        
    def _ols(self,X,y):
        # 2 计算 w = (X.T@X)^{-1}(X.T@X)
        return np.linalg.inv(X.T @ X) @ (X.T @ y)
        
    def train(self,X_train,y_train):
        # 3 带入数据,根据X计算w
        X_train = _preprocess_data_X(self,X_train)
        self.w = _ols(self,X_train,y_train)
        
    def predict(self,X):
        # 4 预测
        X = self._preprocess_data_X(X)
        return np.matmul(X,self.w)
        
        
if __name__ == '__main__':
    # X_train,y_train
    import numpy as np
    X_train = np.linspace(0,10,100).reshape(-1,1)
    y_train = X_train*3+1+ (np.random.randn(100)/100).reshape(-1,1)  # [100,2]
    ols_reg = LinearRegression()
    w = ols_reg.train(X_train,y_train)
    y_pred = ols_reg.predict(X_train)
    print(y_pred.shape)
    
'''
(2, 1)
[[1.00133115]
 [2.99992476]]
(100, 1)
'''

(2) 用梯度下降法计算参数

''' Linear_regression_gd
1. initial parameters
2. loss ,gradient,gradient_descent,process_data
3. train,predict
4. application
'''
import numpy as np
class linear_regression_gd():
    def __init__(self,iters=1000,eta=1e-2,tol=None):
        self.iters = iters  # 训练次数
        self.eta = eta      # 梯度下降的学习率
        self.tol = tol      # 梯度下降的阈值
        self.w = None       # 参数

    def _preprocess_data_X(self, X):
        '''1 加上常数项'''
        m,n = X.shape
        X_ = np.ones([m,n+1])
        X_[:, 1:] = X
        return X_

    def _predict(self, X, w):
        return  np.matmul(X, w)

    def predict(self, X, w):
        ''' 2 预测 '''
        X = self._preprocess_data_X(X)
        return self._predict(X, w)

    def _loss(self,y_pre,y):
        '''3 损失'''
        return np.mean((y_pre-y)**2)

    def _gradient(self, X, y, y_pre):
        return  np.matmul(X.T,(y_pre-y))/y.size   # [100,2] [100,1]  [2,1]

    def _gradient_descent(self, w, X, y):
        '''4 梯度下降算法'''
        if self.tol is not None:
            loss_old = np.inf
        # 4.1 迭代
        for i in range(self.iters):
            y_pre = self._predict(X, w)
            loss = self._loss(y_pre,y)
            #print('第%d loss %f:'%(i,loss))
            # 4.2 早停
            if self.tol is not None:
                if loss_old - loss < self.tol:
                    break
                loss_old = loss
            # 4.3 更新参数
            grad = self._gradient(X, y, y_pre)
            w -= self.eta * grad
            if i%100 == 0:
                print(i,w)

    def train(self,X_train,y_train):
        '''5 训练'''
        # 5.1 处理数据
        X_train = self._preprocess_data_X(X_train)
        # 5.2 初始化参数
        self.w = (np.random.random(X_train.shape[-1])).reshape(2,1)
        # 5.3 梯度下降算法
        self._gradient_descent(self.w, X_train,y_train)


if __name__ == '__main__':
    X_train = np.linspace(0,10,100).reshape(-1,1)
    y_train = X_train*3+1+ np.random.randn(1)/100  # [100,2]
    reg_gd = linear_regression_gd(iters=3400,eta=1e-2,tol=1e-10)
    reg_gd.train(X_train,y_train)
    w = reg_gd.w
    y_pre = reg_gd.predict(X_train,w)
    print(y_pre.shape)


'''
0 [[0.20966034]
 [1.6335141 ]]
100 [[0.55296788]
 [3.06662242]]
200 [[0.65046267]
 [3.05196308]]
300 [[0.7265051 ]
 [3.04052933]]
400 [[0.78581544]
 [3.03161141]]
500 [[0.83207537]
 [3.02465576]]
600 [[0.86815645]
 [3.01923061]]
700 [[0.89629839]
 [3.01499918]]
800 [[0.91824808]
 [3.01169882]]
900 [[0.93536805]
 [3.00912466]]
1000 [[0.94872101]
 [3.0071169 ]]
1100 [[0.95913584]
 [3.00555093]]
1200 [[0.96725903]
 [3.00432952]]
1300 [[0.97359483]
 [3.00337687]]
1400 [[0.97853652]
 [3.00263384]]
1500 [[0.98239086]
 [3.0020543 ]]
1600 [[0.9853971 ]
 [3.00160228]]
1700 [[0.98774187]
 [3.00124972]]
1800 [[0.9895707 ]
 [3.00097474]]
1900 [[0.99099712]
 [3.00076026]]
2000 [[0.99210968]
 [3.00059297]]
2100 [[0.99297744]
 [3.0004625 ]]
2200 [[0.99365425]
 [3.00036073]]
2300 [[0.99418215]
 [3.00028136]]
2400 [[0.99459388]
 [3.00021945]]
2500 [[0.99491503]
 [3.00017116]]
2600 [[0.9951655]
 [3.0001335]]
2700 [[0.99536087]
 [3.00010413]]
2800 [[0.99551324]
 [3.00008121]]
2900 [[0.99563209]
 [3.00006334]]
3000 [[0.99572479]
 [3.00004941]]
(100, 1)

Process finished with exit code 0
'''

(3) 随机梯度下降

import numpy as np

class Linear_Regression_sgd:

    def __init__(self, n_iter=1000, eta=0.01):
        self.n_iter = n_iter  # 训练迭代次数
        self.eta = eta        # 学习率
        self.theta = None     # 模型参数theta (训练时根据数据特征数量初始化)

    def _preprocess_data_X(self, X):
        m,n = X.shape
        X_ = np.ones([m,n+1])
        X_[:, 1:] = X
        return X_

    def _gradient(self, xi, yi, w):
        return -xi * (yi - np.dot(xi, w))  # 计算当前梯度

    def _stochastic_gradient_descent(self, X, y, eta, n_iter):
        X_ = X.copy()
        m, _ = X_.shape

        eta0 = eta
        step_i = 0

        for j in range(n_iter):
            np.random.shuffle(X_)  # 随机乱序
            for i in range(m):
                grad = self._gradient(X_[i], y[i], self.w) # 计算梯度(每次只使用其中一个样本)
                step_i += 1
                eta = eta0 / np.power(step_i, 0.25)
                self.w += eta * -grad  # 更新参数w
            if j % 10 == 0 :
                print(self.w)

    def train(self, X, y):
        if self.theta is None:
            X = self._preprocess_data_X(X)
            _, n = X.shape
            self.w = np.random.random(n)
        self._stochastic_gradient_descent(X, y, self.eta, self.n_iter)

    def predict(self, X):
        X = self._preprocess_data_X(X)
        return np.dot(X, self.w)

if __name__ == '__main__':
    X_train = np.linspace(0, 10, 100).reshape(-1, 1)  # [100,1]
    y_train = X_train*0.8 + 0.5 + np.random.randn(1)/100  # [100,1]
    reg_sgd = Linear_Regression_sgd(n_iter=100, eta = 1e-5)
    reg_sgd.train(X_train, y_train)
    w = reg_sgd.w
    y_pre = reg_sgd.predict(X_train)


'''
[0.87034832 0.56149583]
[0.87206667 0.56075662]
[0.87340481 0.55991468]
[0.87459365 0.559757  ]
[0.87569099 0.55915361]
[0.87672773 0.55848424]
[0.87771676 0.55795229]
[0.87866814 0.55752828]
[0.87958761 0.55706313]
[0.88048    0.55706118]

Process finished with exit code 0
'''

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值