线性回归梯度下降

最新推荐文章于 2024-05-06 11:15:59 发布

我爱charming你

最新推荐文章于 2024-05-06 11:15:59 发布

阅读量152

点赞数

文章标签：线性回归算法回归

本文链接：https://blog.csdn.net/weixin_42601270/article/details/131754146

版权

一、 linear伪代码实现

import numpy as np
"""
训练模型：数据集为常量(固定),参数(theta)为自变量。--->训练参数(theta)
预测模式：参数(theta)为常量(固定),数据集为自变量--->换数据集预测

Y=X*theta--->X为常量，theta为自变量。

此种形式是以数据集固定的形式----》去求参数theta

X-->sh:8*2 为8个样本，两维特征(其中由一维为常数量1)

theta-->2*1 为一组2个参数，之训练一组参数。(第一个theta0，对应常数量x0=1)   -----》维数跟着特征维数走

"""

class Linear:
    def __init__(self,use_b=True):
        self.use_b=use_b
        self.theta=None
        self.theta0=0##充当常量

    def train(self,X,Y):
        if self.use_b:##if True
            X=np.column_stack((np.ones((X.shape[0],1)),X))##这里在数据集前面加了x0=1 sh:8*2  [[ 1. 10.], [ 1. 15.], [ 1. 20.], [ 1. 30.], [ 1. 50.], [ 1. 60.], [ 1. 60.], [ 1. 70.]]
        ###为了求解方便，将数组转为矩阵形式
        X=np.mat(X)##sh:8*2
        Y=np.mat(Y)##sh:8*1  [[0.8], [1. ], [1.8], [2. ], [3.2], [3. ], [3.1], [3.5]]

        ##三、根据解析式公式求解theta的值
        theta=(X.T * X).T * X.T * Y###sh:2*1 [[0.5949305 ], [0.04330335]]
        ####将所有参数值分开
        if self.use_b:
            self.theta0=theta[0]
            self.theta=theta[1:]

        else:
            self.theta0=0
            self.theta=theta

    def predict(self,X):
        predict_y=self.theta*X+self.theta0##就已经是公示了--》最后会得到值   self.theta:[[14446046.]]  self.theta0:[[280497.2]]
        return predict_y

    def score(self,X,Y):
        pass

    def save(self):

        pass

    def load(self,model_path):

        pass




if __name__ == '__main__':
    X1=np.array([10,15,20,30,50,60,60,70]).reshape((-1,1))##sh:8*1
    Y=np.array([0.8,1.0,1.8,2.0,3.2,3.0,3.1,3.5]).reshape((-1,1))##8*1
    linear=Linear(use_b=True)
    linear.train(X1,Y)
    x_test=[[55]]#############此预测输入样本，回归模型中特征维度要与训练样本特征维度保持一致--->一个样本一个样本预测
    y_test_hat=linear.predict(x_test)
    print("预测结果:",y_test_hat)
    print(linear.theta)
    print(linear.theta0)

二、基于表达式解析的线性回归代码实现

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
import sys


## 设置字符集，防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False
# 一、构造数据
X1 = np.array([10, 15, 20, 30, 50, 60, 60, 70]).reshape((-1, 1))
Y = np.array([0.8, 1.0, 1.8, 2.0, 3.2, 3.0, 3.1, 3.5]).reshape((-1, 1))


###添加截距项对应的X值 np.column_stack()
##X=np.hstack((np.ones_like(X1),X1))
X = np.column_stack((np.ones_like(X1), X1))
#####不加入截距项
# X = X1
# print(X)
# print(Y)
# sys.exit()
# 二、为了求解比较方便，将numpy的'numpy.ndarray'的数据类型转换为矩阵的形式的。
X = np.mat(X)
Y = np.mat(Y)
# print(X)
# print(Y)
# sys.exit()
# 三、根据解析式的公式求解theta的值
theta = (X.T * X).I * X.T * Y
print(theta)

# 四、 根据求解出来的theta求出预测值
predict_y = X * theta
print(predict_y)
# 查看MSE和R^2
print(Y.shape)
print(predict_y.shape)
mse = mean_squared_error(y_true=Y.A,y_pred=predict_y.A)
print("MSE",mse)
r2 = r2_score(y_true=Y.A,y_pred=predict_y.A)
print("r^2",r2)

# 四、画图可视化
plt.plot(X1, Y, 'bo', label=u'真实值')
plt.plot(X1, predict_y, 'r--o', label=u'预测值')
plt.legend(loc='lower right')
plt.show()



# -- encoding:utf-8 --
"""
Create on 19/3/2
"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
## 设置字符集，防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False


flag = True
# 一、构造数据
X1 = np.array([
    [10, 1],
    [15, 1],
    [20, 1],
    [30, 1],
    [50, 2],
    [60, 1],
    [60, 2],
    [70, 2]])#####8个样本2维特征

X2 = np.array([10, 1,15, 1,20, 1,30, 1,50, 2,60, 1,60, 2,70, 2]).reshape((-1, 2))#####8个样本2维特征
Y = np.array([0.8, 1.0, 1.8, 2.0, 3.2, 3.0, 3.1, 3.5]).reshape((-1, 1))##8个标签--》8*1

if flag:
    # 添加一个截距项对应的X值
    X = np.column_stack((X1, np.ones(shape=(X1.shape[0], 1))))
    # X = np.hstack((X1,np.ones(shape=(X1.shape[0], 1))))
else:
    # 不加入截距项
    X = X1


# 二、为了求解比较方便，将numpy的'numpy.ndarray'的数据类型转换为矩阵的形式的。
X = np.mat(X)
Y = np.mat(Y)


# 三、根据解析式的公式求解theta的值
theta = (X.T * X).I * X.T * Y####样本的列数表示行，标签的列数表示列
print(theta)

# 四、 根据求解出来的theta求出预测值
predict_y = X * theta
# 查看MSE和R^2
print(Y.shape)
print(predict_y.shape)

# 基于训练好的模型参数对一个未知的样本做一个预测
if flag:
    x = np.mat(np.array([[55.0, 2.0,1.0],[55.0, 2.0,1.0]]))
else:
    x = np.mat(np.array([[55.0, 2.0]]))
pred_y = x * theta
print("当面积为55平并且房间数目为2的时候，预测价格为:{}".format(pred_y))

from mpl_toolkits.mplot3d import Axes3D

x1 = X[:, 0]###训练样本
# print(x1)
x2 = X[:, 1]
# print(x2)
fig = plt.figure(facecolor='w')
# ax = Axes3D(fig)
ax = Axes3D(fig,auto_add_to_figure=False)
fig.add_axes(ax)
ax.scatter(x1, x2, Y, s=40, c='r', depthshade=False)  ###画点图



x1 = np.arange(0, 100) #0~100
x2 = np.arange(0, 4)##0~3  顶替y轴
k=np.meshgrid(x1, x2)
####网格化后以x2(想象成y轴)=0,x2=1,x2=2,x2=3的顺序，嵌套画400个点
x1, x2 = np.meshgrid(x1, x2) #4*100  (0->x2,0->x1)-->(0,100)、（1,0）---》(1,100）、（2,0）---》(2,100）、（3,0）---》(3,100）


def predict(x1, x2, theta, base=False):
    if base:
        ###########theta是矩阵
        e=theta[0]
        t=theta[1]
        y_ = x1 * theta[0] + x2 * theta[1] + theta[2]
    else:
        y_ = x1 * theta[0] + x2 * theta[1]
    return y_
##平面上所有的点都映射到三维z上，因为是线性方程，所以保证了是一个平面
z = np.array(list(map(lambda t: predict(t[0], t[1], theta, base=flag), zip(x1.flatten(), x2.flatten()))))##4*100
z.shape = x1.shape##4*100
print(z.shape)
ax.plot_surface(x1, x2, z, rstride=1, cstride=1, cmap=plt.cm.jet)  ##画超平面   cmap=plt.cm.jet彩图
ax.set_title(u'房屋租赁价格预测')
#
plt.show()

三、bgd、sgd、mbgd

3.1小批量梯度下降

import numpy as np
import matplotlib.pyplot as plt

X = np.array([[1, 25], [1, 28], [1, 31], [1, 35], [1, 38], [1, 40]])  ##对数据增量一列常数维，防止输入全为0的时候，模型为0
y = np.array([[106], [145], [167], [208], [233], [258]])

theta = np.zeros((2, 1))  ##参数一列(维)表示一组参数
X=np.mat(X)
y=np.mat(y)
def cost(theta):
    m = y.size
    y_hat = X.dot(theta)
    J = 1.0 / (2 * m) * np.square(y_hat - y).sum()

    return J


# def gradientDescent(X, y, theta, alpha=0.01, iters=15):  ##批量梯度下降，训练了1500epoch  其实就是bsd
#     m = y.size
#     # costs=[]
#     for i in range(iters):
#         y_hat = X.dot(theta)
#         yy = X.T.dot(y_hat - y)  ##得到的是所有梯度的列表-》形状维 n(上一个输入特征n)*1(假设输出层)
#         theta -= alpha * (1.0 / m) * (X.T.dot(y_hat - y))  ##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以
#     """
#     iter(i)=0:
#         thera=[[ 1.86166667], [63.90166667]]
#
#     iter(i)=14:
#         thera=[[1.91449752e+14], [6.45180391e+15]]
#     iter(i)=1499:
#         thera=[[nan], [nan]]
#     """
#
#
# def gradientDescent_bgd(X, y, theta, alpha=0.01, iters=15):  ##批量梯度下降，训练了1500epoch
#     m = y.size
#     theta = np.zeros((2, 1))
#     # costs=[]
#     for i in range(iters):
#         sum_gradient = np.zeros(shape=theta.shape, dtype=float)
#         for index in range(len(X)):
#             y_pred = X[index:index + 1].dot(theta)
#             ##全部样本梯度的累加
#             sum_gradient += X[index:index + 1].T * (y_pred - y[index])  ####一个样本算全部的梯度值,此种方式得到的是一个（n*1)的梯度列表
#         theta -= alpha * (
#                     1.0 / m) * sum_gradient  ##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以 iter=0:thera[[ 1.86166667], [63.90166667]]
#     """
#       iter(i)=0:
#           thera=[[ 1.86166667], [63.90166667]]
#
#       iter(i)=14:
#           [[1.91449752e+14], [6.45180391e+15]]
#
#       iter(i)=1499:
#           thera=[[nan], [nan]]
#       """


def gradientDescent_mbgd(X, y, theta, alpha=0.01, iters=15,batch_size=3):  ##批量梯度下降，训练了1500epoch
    m = y.size
    theta = np.zeros((2, 1))
    # costs=[]
    for i in range(iters):
        """
        sgd反馈传播次数：样本数量m*iter
        """
        for index in range(0,len(X),batch_size):
            sum_gradient = np.zeros(shape=theta.shape, dtype=float)
            for j in range(batch_size):
                y_pred = X[index:index + j].dot(theta)
                ##全部样本梯度的累加
                rr=(y_pred - y[index:index + j])
                yy=X[index:index +j].T
                uu=(X[index:index + j].T )* (y_pred - y[index:index + j])
                sum_gradient += uu
                # sum_gradient += X[index:index + t].T * (y_pred - y[index:index + t])  ####一个样本算全部的梯度值,此种方式得到的是一个（n*1)的梯度列表 由梯度求导公式得到dXseita/dseita=X.T  (y_pred - y[index])-->是一个数值
            theta -= alpha  *(1/batch_size)* sum_gradient  ##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以 iter=0:thera[[ 1.86166667], [63.90166667]]
    print(theta)




if __name__ == '__main__':
    # gradientDescent(X, y, theta)
    # gradientDescent_bgd(X, y, theta)
    gradientDescent_mbgd(X, y, theta)

3.2随机梯度下降

import numpy as np
import matplotlib.pyplot as plt

X = np.array([[1, 25], [1, 28], [1, 31], [1, 35], [1, 38], [1, 40]])  ##对数据增量一列常数维，防止输入全为0的时候，模型为0
y = np.array([[106], [145], [167], [208], [233], [258]])

theta = np.zeros((2, 1))  ##参数一列(维)表示一组参数


def cost(theta):
    m = y.size
    y_hat = X.dot(theta)
    J = 1.0 / (2 * m) * np.square(y_hat - y).sum()

    return J


# def gradientDescent(X, y, theta, alpha=0.01, iters=15):  ##批量梯度下降，训练了1500epoch  其实就是bsd
#     m = y.size
#     # costs=[]
#     for i in range(iters):
#         y_hat = X.dot(theta)
#         yy = X.T.dot(y_hat - y)  ##得到的是所有梯度的列表-》形状维 n(上一个输入特征n)*1(假设输出层)
#         theta -= alpha * (1.0 / m) * (X.T.dot(y_hat - y))  ##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以
#     """
#     iter(i)=0:
#         thera=[[ 1.86166667], [63.90166667]]
#
#     iter(i)=14:
#         thera=[[1.91449752e+14], [6.45180391e+15]]
#     iter(i)=1499:
#         thera=[[nan], [nan]]
#     """
#
#
# def gradientDescent_bgd(X, y, theta, alpha=0.01, iters=15):  ##批量梯度下降，训练了1500epoch
#     m = y.size
#     theta = np.zeros((2, 1))
#     # costs=[]
#     for i in range(iters):
#         sum_gradient = np.zeros(shape=theta.shape, dtype=float)
#         for index in range(len(X)):
#             y_pred = X[index:index + 1].dot(theta)
#             ##全部样本梯度的累加
#             sum_gradient += X[index:index + 1].T * (y_pred - y[index])  ####一个样本算全部的梯度值,此种方式得到的是一个（n*1)的梯度列表
#         theta -= alpha * (
#                     1.0 / m) * sum_gradient  ##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以 iter=0:thera[[ 1.86166667], [63.90166667]]
#     """
#       iter(i)=0:
#           thera=[[ 1.86166667], [63.90166667]]
#
#       iter(i)=14:
#           [[1.91449752e+14], [6.45180391e+15]]
#
#       iter(i)=1499:
#           thera=[[nan], [nan]]
#       """


def gradientDescent_sgd(X, y, theta, alpha=0.01, iters=15):  ##批量梯度下降，训练了1500epoch
    m = y.size
    theta = np.zeros((2, 1))
    # costs=[]
    for i in range(iters):
        """
        sgd反馈传播次数：样本数量m*iter
        """
        # sum_gradient = np.zeros(shape=theta.shape, dtype=float)
        for index in range(len(X)):
            y_pred = X[index:index + 1].dot(theta)
            ##全部样本梯度的累加
            rr=(y_pred - y[index])
            gradient = X[index:index + 1].T * (y_pred - y[index])  ####一个样本算全部的梯度值,此种方式得到的是一个（n*1)的梯度列表 由梯度求导公式得到dXseita/dseita=X.T  (y_pred - y[index])-->是一个数值
            theta -= alpha  * gradient  ##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以 iter=0:thera[[ 1.86166667], [63.90166667]]
    print(theta)




if __name__ == '__main__':
    # gradientDescent(X, y, theta)
    # gradientDescent_bgd(X, y, theta)
    gradientDescent_sgd(X, y, theta)

3.3 bgd梯度下降

import numpy as np
import matplotlib.pyplot as plt

X=np.array([[1,25],[1,28],[1,31],[1,35],[1,38],[1,40]])##对数据增量一列常数维，防止输入全为0的时候，模型为0
y=np.array([[106],[145],[167],[208],[233],[258]])

theta=np.zeros((2,1))##参数一列(维)表示一组参数

def cost(theta):
    m=y.size
    y_hat=X.dot(theta)
    J=1.0/(2*m) * np.square(y_hat - y).sum()

    return J
def gradientDescent(X,y,theta,alpha=0.01,iters=15):##批量梯度下降，训练了1500epoch  其实就是bsd
    m=y.size
    # costs=[]
    for i in range(iters):
        y_hat=X.dot(theta)
        yy=X.T.dot(y_hat-y)##得到的是所有梯度的列表-》形状维 n(上一个输入特征n)*1(假设输出层)
        theta-=alpha*(1.0/m)*(X.T.dot(y_hat-y))##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以
    """
    iter(i)=0:
        thera=[[ 1.86166667], [63.90166667]]
        
    iter(i)=14:   
        thera=[[1.91449752e+14], [6.45180391e+15]]
    iter(i)=1499:
        thera=[[nan], [nan]]
    """


def gradientDescent_bgd(X,y,theta,alpha=0.01,iters=15):##批量梯度下降，训练了1500epoch
    m=y.size
    theta = np.zeros((2, 1))
    # costs=[]
    for i in range(iters):
        sum_gradient=np.zeros(shape=theta.shape,dtype=float)
        for index in range(len(X)):
            y_pred=X[index:index+1].dot(theta)
            ##全部样本梯度的累加
            sum_gradient+=X[index:index+1].T*(y_pred-y[index])####一个样本算全部的梯度值,此种方式得到的是一个（n*1)的梯度列表
        theta-=alpha*(1.0/m)*sum_gradient##theta=theta-1/m*lr*(导数)   这里的梯度是一次算出所以 iter=0:thera[[ 1.86166667], [63.90166667]]
    """
      iter(i)=0:
          thera=[[ 1.86166667], [63.90166667]]
          
      iter(i)=14: 
          [[1.91449752e+14], [6.45180391e+15]]
          
      iter(i)=1499:
          thera=[[nan], [nan]]
      """

if __name__ == '__main__':
    gradientDescent(X,y,theta)
    gradientDescent_bgd(X,y,theta)