机器学习__1 线性回归

最新推荐文章于 2023-06-12 23:12:07 发布

山居秋暝LS

最新推荐文章于 2023-06-12 23:12:07 发布

阅读量159

点赞数

分类专栏：机器学习

本文链接：https://blog.csdn.net/qq_35732321/article/details/119447063

版权

机器学习专栏收录该内容

12 篇文章 0 订阅

订阅专栏

第1章线性回归

线性回归是描述变量之间的线性关系。

1.1 线性回归模型

$h_{w,b}(x) = \sum_{i=1}^n w_ix_i+b$

$w = (b,w_1,w_2,......,w_n)^T = (w_0,w_1,w_2,......,w_n)^T$

$x = (1,x_1,x_2,......,x_n)^T$

$h_{w}(x) = \sum_{i=1}^n w^Tx$

1.2 最小二乘法

最小二乘法(OLS Ordinary Least Squares):用MSE作为损失函数求解参数w。MSE是均方误差，即使预测值与实际值差的平方和的均值。
$X=\begin{pmatrix} 1 & x_{11} & \dots &x_{1n} \\ 1 & x_{21} & \dots &x_{2n} \\ \vdots & \vdots & \ddots & \vdots \\ 1 & x_{m1} & \dots &x_{mn} \\ \end{pmatrix} \quad = \begin{pmatrix} x_1^T \\x_2^T \\ \vdots \\ x_m^T\end{pmatrix} \quad$

$W=\begin{pmatrix} w_0 \\ w_1 \\ \vdots \\w_n\\ \end{pmatrix} \quad$
$\frac{1}{2m}\sum_{i=1}^{m}(h_w(x_i)-y_i)^2 = \frac{1}{2m}\sum_{i=1}^{m}(w^Tx-y_i)^2$

$\Delta J(w) = \frac{1}{2m}\sum_{i=1}^{m}\frac{\alpha (w^Tx-y_i)^2}{\alpha w}$

$\frac{1}{2m}\sum_{i=1}^{m}2(w^Tx-y_i) \frac{\alpha(w^Tx-y_i)}{\alpha w}$

$\frac{1}{2m}\sum_{i=1}^{m}2(w^Tx-y_i)x_i$

$\frac{1}{m}X^T(Xw-y)$

$\hat w= (X^TX)^{-1}(X^Ty)$

$w-\eta \frac{1}{m}X^T(Xw-y)$

1.3 梯度下降

梯度下降法，所有的样本都参与训练。
$w-\eta \frac{1}{m}X^T(Xw-y)$
随机梯度下降法,每次只用一个样本的误差更新梯度。占用内存少，支持海量数据。当损失函数很不规则的时候，它更有可能跳过局部最小值，接近全局最小值，但是梯度数值震荡剧烈。每一次epoch，都要随机打乱数据集。
$$
w:= w-\eta (w^Tx_i-y_i)x_i

$小批量梯度下降法，每次使用一小批样本更新参数。$

w:= w-\eta \frac{1}{N}\sum_{i=k}^{K+N}(wTx_i-y_i)x_i
$$

1.4 算法实现

(1) 用矩阵乘法计算参数

import numpy as np

class OLSLinearRegression:

    def _preprocess_data_X(self,X):
        # 1 数据处理
        m , n = X.shape
        X_ = np.ones([m,n+1])
        X_[:,1:] = X
        return X_
        
    def _ols(self,X,y):
        # 2 计算 w = (X.T@X)^{-1}(X.T@X)
        return np.linalg.inv(X.T @ X) @ (X.T @ y)
        
    def train(self,X_train,y_train):
        # 3 带入数据，根据X计算w
        X_train = _preprocess_data_X(self,X_train)
        self.w = _ols(self,X_train,y_train)
        
    def predict(self,X):
        # 4 预测
        X = self._preprocess_data_X(X)
        return np.matmul(X,self.w)
        
        
if __name__ == '__main__':
    # X_train,y_train
    import numpy as np
    X_train = np.linspace(0,10,100).reshape(-1,1)
    y_train = X_train*3+1+ (np.random.randn(100)/100).reshape(-1,1)  # [100,2]
    ols_reg = LinearRegression()
    w = ols_reg.train(X_train,y_train)
    y_pred = ols_reg.predict(X_train)
    print(y_pred.shape)
    
'''
(2, 1)
[[1.00133115]
 [2.99992476]]
(100, 1)
'''

(2) 用梯度下降法计算参数

''' Linear_regression_gd
1. initial parameters
2. loss ,gradient,gradient_descent,process_data
3. train,predict
4. application
'''
import numpy as np
class linear_regression_gd():
    def __init__(self,iters=1000,eta=1e-2,tol=None):
        self.iters = iters  # 训练次数
        self.eta = eta      # 梯度下降的学习率
        self.tol = tol      # 梯度下降的阈值
        self.w = None       # 参数

    def _preprocess_data_X(self, X):
        '''1 加上常数项'''
        m,n = X.shape
        X_ = np.ones([m,n+1])
        X_[:, 1:] = X
        return X_

    def _predict(self, X, w):
        return  np.matmul(X, w)

    def predict(self, X, w):
        ''' 2 预测 '''
        X = self._preprocess_data_X(X)
        return self._predict(X, w)

    def _loss(self,y_pre,y):
        '''3 损失'''
        return np.mean((y_pre-y)**2)

    def _gradient(self, X, y, y_pre):
        return  np.matmul(X.T,(y_pre-y))/y.size   # [100,2] [100,1]  [2,1]

    def _gradient_descent(self, w, X, y):
        '''4 梯度下降算法'''
        if self.tol is not None:
            loss_old = np.inf
        # 4.1 迭代
        for i in range(self.iters):
            y_pre = self._predict(X, w)
            loss = self._loss(y_pre,y)
            #print('第%d loss %f:'%(i,loss))
            # 4.2 早停
            if self.tol is not None:
                if loss_old - loss < self.tol:
                    break
                loss_old = loss
            # 4.3 更新参数
            grad = self._gradient(X, y, y_pre)
            w -= self.eta * grad
            if i%100 == 0:
                print(i,w)

    def train(self,X_train,y_train):
        '''5 训练'''
        # 5.1 处理数据
        X_train = self._preprocess_data_X(X_train)
        # 5.2 初始化参数
        self.w = (np.random.random(X_train.shape[-1])).reshape(2,1)
        # 5.3 梯度下降算法
        self._gradient_descent(self.w, X_train,y_train)


if __name__ == '__main__':
    X_train = np.linspace(0,10,100).reshape(-1,1)
    y_train = X_train*3+1+ np.random.randn(1)/100  # [100,2]
    reg_gd = linear_regression_gd(iters=3400,eta=1e-2,tol=1e-10)
    reg_gd.train(X_train,y_train)
    w = reg_gd.w
    y_pre = reg_gd.predict(X_train,w)
    print(y_pre.shape)


'''
0 [[0.20966034]
 [1.6335141 ]]
100 [[0.55296788]
 [3.06662242]]
200 [[0.65046267]
 [3.05196308]]
300 [[0.7265051 ]
 [3.04052933]]
400 [[0.78581544]
 [3.03161141]]
500 [[0.83207537]
 [3.02465576]]
600 [[0.86815645]
 [3.01923061]]
700 [[0.89629839]
 [3.01499918]]
800 [[0.91824808]
 [3.01169882]]
900 [[0.93536805]
 [3.00912466]]
1000 [[0.94872101]
 [3.0071169 ]]
1100 [[0.95913584]
 [3.00555093]]
1200 [[0.96725903]
 [3.00432952]]
1300 [[0.97359483]
 [3.00337687]]
1400 [[0.97853652]
 [3.00263384]]
1500 [[0.98239086]
 [3.0020543 ]]
1600 [[0.9853971 ]
 [3.00160228]]
1700 [[0.98774187]
 [3.00124972]]
1800 [[0.9895707 ]
 [3.00097474]]
1900 [[0.99099712]
 [3.00076026]]
2000 [[0.99210968]
 [3.00059297]]
2100 [[0.99297744]
 [3.0004625 ]]
2200 [[0.99365425]
 [3.00036073]]
2300 [[0.99418215]
 [3.00028136]]
2400 [[0.99459388]
 [3.00021945]]
2500 [[0.99491503]
 [3.00017116]]
2600 [[0.9951655]
 [3.0001335]]
2700 [[0.99536087]
 [3.00010413]]
2800 [[0.99551324]
 [3.00008121]]
2900 [[0.99563209]
 [3.00006334]]
3000 [[0.99572479]
 [3.00004941]]
(100, 1)

Process finished with exit code 0
'''

(3) 随机梯度下降

import numpy as np

class Linear_Regression_sgd:

    def __init__(self, n_iter=1000, eta=0.01):
        self.n_iter = n_iter  # 训练迭代次数
        self.eta = eta        # 学习率
        self.theta = None     # 模型参数theta (训练时根据数据特征数量初始化)

    def _preprocess_data_X(self, X):
        m,n = X.shape
        X_ = np.ones([m,n+1])
        X_[:, 1:] = X
        return X_

    def _gradient(self, xi, yi, w):
        return -xi * (yi - np.dot(xi, w))  # 计算当前梯度

    def _stochastic_gradient_descent(self, X, y, eta, n_iter):
        X_ = X.copy()
        m, _ = X_.shape

        eta0 = eta
        step_i = 0

        for j in range(n_iter):
            np.random.shuffle(X_)  # 随机乱序
            for i in range(m):
                grad = self._gradient(X_[i], y[i], self.w) # 计算梯度(每次只使用其中一个样本)
                step_i += 1
                eta = eta0 / np.power(step_i, 0.25)
                self.w += eta * -grad  # 更新参数w
            if j % 10 == 0 :
                print(self.w)

    def train(self, X, y):
        if self.theta is None:
            X = self._preprocess_data_X(X)
            _, n = X.shape
            self.w = np.random.random(n)
        self._stochastic_gradient_descent(X, y, self.eta, self.n_iter)

    def predict(self, X):
        X = self._preprocess_data_X(X)
        return np.dot(X, self.w)

if __name__ == '__main__':
    X_train = np.linspace(0, 10, 100).reshape(-1, 1)  # [100,1]
    y_train = X_train*0.8 + 0.5 + np.random.randn(1)/100  # [100,1]
    reg_sgd = Linear_Regression_sgd(n_iter=100, eta = 1e-5)
    reg_sgd.train(X_train, y_train)
    w = reg_sgd.w
    y_pre = reg_sgd.predict(X_train)


'''
[0.87034832 0.56149583]
[0.87206667 0.56075662]
[0.87340481 0.55991468]
[0.87459365 0.559757  ]
[0.87569099 0.55915361]
[0.87672773 0.55848424]
[0.87771676 0.55795229]
[0.87866814 0.55752828]
[0.87958761 0.55706313]
[0.88048    0.55706118]

Process finished with exit code 0
'''

山居秋暝LS

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
机器学习__1 线性回归

第1章线性回归线性回归是描述变量之间的线性关系。1.1 线性回归模型hw,b(x)=∑i=1nwixi+bh_{w,b}(x) = \sum_{i=1}^n w_ix_i+bhw,b(x)=i=1∑nwixi+bw=(b,w1,w2,......,wn)T=(w0,w1,w2,......,wn)Tw = (b,w_1,w_2,......,w_n)^T = (w_0,w_1,w_2,......,w_n)^Tw=(b,w1,w2,......,wn)T=(w0,w1,w2,.
复制链接

扫一扫