Python实现线性回归

最新推荐文章于 2021-09-22 14:57:18 发布

「已注销」

最新推荐文章于 2021-09-22 14:57:18 发布

阅读量184

点赞数

分类专栏：机器学习文章标签： python 机器学习

本文链接：https://blog.csdn.net/fatfairyyy/article/details/115308285

版权

机器学习专栏收录该内容

43 篇文章 0 订阅

订阅专栏

这篇博客介绍了如何用Python实现线性回归模型，包括使用梯度下降和正规方程两种方法。首先，通过随机生成的数据集展示了数据分布，然后利用scikit-learn库划分训练集和测试集。接着，定义了一个`LinearRegression`类，实现了梯度下降的训练过程，展示训练过程中代价函数随迭代次数的降低。最后，用正规方程训练模型，并评估了模型在训练集和测试集上的误差。

摘要由CSDN通过智能技术生成

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
np.random.seed(123)

# 以下是 随机的 数据集
X = 2*np.random.rand(500,1) #通过本函数可以返回一个或一组服从“0~1”均匀分布的随机样本值。随机样本取值范围是[0,1)，不包括1。
# print(X) 👆通过添加参数(500,1)使X成为了一个500行一列的矩阵，即500维列向量
y = 5 + 3*X + np.random.randn(500,1)#生成X对应的真实值y
# print(y)
fig = plt.figure(figsize=(8,6))
plt.scatter(X,y)
plt.title("Dataset")
plt.xlabel("First feature")
plt.ylabel("Second feature")
plt.show()

#将数据分割为训练集和测试集，训练集用于训练神经网络，测试集用于验证训练成果
X_train,X_test,y_train,y_test = train_test_split(X,y)
#👆随即划分了训练集和测试集，当然也可以人为地切片分割。
print(f'Shape X_train:{X_train.shape}')
print(f'Shape y_train:{y_train.shape}')
print(f'Shape X_test:{X_test.shape}')
print(f'Shape y_test:{y_test.shape}')

#线性回归分类
class LinearRegression:

    def __init__(self):
        pass
    def train_gradient_descent(self,X,y,learning_rate = 0.01,n_iters = 100):
        """
        使用梯度下降训练线性回归模型。
        其中，X为训练集样本，y也为训练集样本，本例中，二者都是列向量。
        而learning_rate为学习率，预先设定为0.01，n_iters设定为100
        """
        # 第零步：初始化参数。
        n_samples,n_features = X.shape
        self.weights = np.zeros(shape=(n_features,1))
        self.bias = 0
        costs = []

        for i in range(n_iters):
            # 第一步，计算输入的特征与权重的线性组合。
            y_predict = np.dot(X,self.weights)+self.bias
            # np.dot()是将输入的两个参数进行乘法运算

            # 第二步：通过训练集计算代价
            cost = (1/n_samples)*np.sum((y_predict-y)**2) #平方差代价函数
            costs.append(cost) #将这一步迭代运算出的代价值存入数组中。

            if i%100 == 0:
                print(f"Cost at iteration {i}:{cost}")
                # 每计算 100 布，输出此时的代价值。（代价值递减是正常的）

            # 第三步：计算梯度（即计算两个导数，用于更新参数）
            dJ_dw = (2/n_samples)*np.dot(X.T,(y_predict-y))
            dJ_db = (2/n_samples)*np.sum((y_predict-y))

            # 第四步：更新参数值
            self.weights = self.weights - learning_rate*dJ_dw
            self.bias = self.bias - learning_rate*dJ_db
        return self.weights,self.bias,costs

# 正规方程实现梯度下降
    def train_normal_equation(self,X,y):
        """
        正规方程计算线性回归参数
        """
        self.weights = np.dot(np.dot(np.linalg.inv(np.dot(X.T,X)),X.T),y)
        self.bias = 0
        return self.weights,self.bias

    def predict(self,X):
        return np.dot(X,self.weights)+self.bias

# 使用梯度下降进行训练
regressor = LinearRegression() # 是我们自己定义的一个类
w_trained,b_trained,costs = regressor.train_gradient_descent(X_train,y_train,learning_rate=0.005,n_iters=600)
fig = plt.figure(figsize=(8,6))
plt.plot(np.arange(600),costs)  #一个参数时，np.arange()以
#参数值为终点，起点默认为0，步长默认为1。
#👆绘制迭代过程中，随迭代步数的增加，代价的减少。
plt.title("Development of cost during training")
plt.xlabel("Steps of iterations")
plt.ylabel("Cost")
plt.show()

#使用正规方程进行训练
n_samples = (375,1)
n_samples_test = (125,1)
X_b_train = np.c_[np.ones((n_samples)),X_train]
X_b_test = np.c_[np.ones((n_samples_test),X_test)]
reg_normal = LinearRegression()
w_trained = reg_normal.train_normal_equation(X_b_trian,y_train)

# 测试梯度下降模型：
n_samples,_ = X_train.shape
n_samples_tests,_ = X_test.shape

y_p_train = regressor.predict(X_train)
y_p_test = regressor.predict(X_test)

error_train = (1/n_samples)*np.sum((y_p_train - y_train)**2) #方差
error_test = (1/n_samples_tests)*np.sum((y_p_test-y_test)**2)

print(f"Error on training set:{np.round(error_train,4)}")
print(f"Error on test set:{np.round(error_test)}")

# 可视化测试预测
fig = plt.figure(figsize=(8,6))
plt.scatter(X_train,y_train)
plt.scatter(X_test,y_p_test)
plt.xlabel("First feature")
plt.ylabel("Second feature")
plt.show()

「已注销」

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python实现线性回归

import numpy as npimport matplotlib.pyplot as pltfrom sklearn.model_selection import train_test_splitnp.random.seed(123)# 以下是随机的数据集X = 2*np.random.rand(500,1) #通过本函数可以返回一个或一组服从“0~1”均匀分布的随机样本值。随机样本取值范围是[0,1)，不包括1。# print(X) ????通过添加参数(500,1)使X成为了一个
复制链接

扫一扫

专栏目录