机器学习（一）：单变量回归方程梯度下降算法的实现

自信亟乐功

已于 2022-10-30 17:37:41 修改

阅读量969

点赞数 3

文章标签：机器学习算法 python

于 2022-10-30 12:15:22 首次发布

本文链接：https://blog.csdn.net/yao1795420133/article/details/127597726

版权

1.预处理文件 prepare_for_training.py

"""Prepares the dataset for training"""

import numpy as np
from .normalize import normalize
from .generate_sinusoids import generate_sinusoids
from .generate_polynomials import generate_polynomials

def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize_data=True):

    # 计算样本总数
    num_examples = data.shape[0]

    data_processed = np.copy(data)

    # 预处理
    features_mean = 0
    features_deviation = 0
    data_normalized = data_processed
    if normalize_data:
        (
            data_normalized,
            features_mean,
            features_deviation
        ) = normalize(data_processed)

        data_processed = data_normalized

    # 特征变换sinusoidal
    if sinusoid_degree > 0:
        sinusoids = generate_sinusoids(data_normalized, sinusoid_degree)
        data_processed = np.concatenate((data_processed, sinusoids), axis=1)

    # 特征变换polynomial
    if polynomial_degree > 0:
        polynomials = generate_polynomials(data_normalized, polynomial_degree, normalize_data)
        data_processed = np.concatenate((data_processed, polynomials), axis=1)

    # 加一列1
    data_processed = np.hstack((np.ones((num_examples, 1)), data_processed))

    return data_processed, features_mean, features_deviation

注：此处没用到特征变换 sinusoidal、polynomial 步骤

2. Linear_regression.py 核心算法的实现

import numpy as np
import prepare_for_training  # 导入预处理的文件

class Linear_regression:
    def __init__(self, data, labels, polynomial_degree = 0, sinusoid_degree = 0, normalize_data = True):
        """
        1.对数据进行预处理
        2.得到特征数目
        3.初始化参数矩阵
        """
        (data_processed,
         features_mean,
         features_deviation) = prepare_for_training(data,
                                                    polynomial_degree = 0, sinusoid_degree = 0, normalize_data = True)
        self.data = data_processed
        self.labels = labels
        self.polynomial_degree = polynomial_degree
        self.sinusoid_degree = sinusoid_degree
        self.normalize_data = normalize_data

        num_features = self.data.shape[1]  # 特征数目
        self.theta = np.zeros((num_features, 1))  # 参数初始化（4，）的矩阵

    # 训练模块，执行梯度下降
    def train(self, alpha, num_iterations = 500):
        cost_history = self.gradient_descent(alpha, num_iterations)
        return self.theta, cost_history

    # 实际迭代模块，执行num_iterations次梯度下降算法
    def gradient_descent(self, alpha, num_iterations):
        cost_history = []  # 存储代价值
        for _ in range(num_iterations):
            self.gradient_step(alpha)
            cost_history.append(self.cost_function(self.labels))
        return cost_history

    # 梯度下降参数更新方法（矩阵运算）
    def gradient_step(self, alpha):
        predictions = Linear_regression.hypothesis(self.data, self.theta)  # 预测值计算
        delta = predictions - self.labels
        theta = self.theta
        num_examples = self.data.shape[0]   # 样本数
        theta = theta - alpha * (1 / num_examples) * (np.dot(delta.T, self.data)).T
        self.theta = theta  # 更新参数

    # 计算学习代价（损失）
    def cost_function(self, labels):
        num_examples = self.data.shape[0]
        delta = Linear_regression.hypothesis(self.data,  self.theta) - labels
        cost = (1 / 2) * (np.dot(delta.T, delta)) / num_examples   # 最小二乘法
        print('cost_rate：', cost[0][0])
        return cost[0][0]

    @staticmethod
    def hypothesis(data, theta):
        predictions = np.dot(data, theta)
        return predictions

    # 得到单元损失
    def get_cost(self, data, labels):
        data_processed = prepare_for_training(data, self.polynomial_degree, self.sinusoid_degree, self.normalize_data)[0]
        return self.cost_function(labels)

    # 用训练好的参数模型，去预测得到的回归值结果
    def get_predict(self, data):
        data_processed = prepare_for_training(data, self.polynomial_degree, self.sinusoid_degree, self.normalize_data)[0]
        predictions = Linear_regression.hypothesis(data_processed, self.theta)  # 预测值计算
        return predictions

3.进行机器学习（对数据进行拟合）univariate_linear_regression.py

"""
#用户名 : 17954
#日期 : 2022/10/28 19:43
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from linear_regression import Linear_regression

pd.set_option('display.unicode.east_asian_width', True)
data = pd.read_csv('./data/world-happiness-report-2017.csv')
# 随机抽取百分之八十行作为训练集
train_data = data.sample(frac=0.8)
# 剩下的百分之二十做测试集
test_data = data.drop(train_data.index)

input_param_name = 'Economy..GDP.per.Capita.'
output_param_name = 'Happiness.Score'

x_train = train_data[[input_param_name]].values
y_train = train_data[[output_param_name]].values
x_test = test_data[[input_param_name]].values
y_test = test_data[[output_param_name]].values

fig = plt.figure(figsize=(10, 6), dpi=100)
ax1 = fig.add_subplot(2, 2, 1)
ax1.scatter(x_train, y_train, label='train_data')
ax1.scatter(x_test, y_test, label='test_data')
ax1.set_xlabel(input_param_name)
ax1.set_ylabel(output_param_name)
ax1.legend()

# 进行机器学习
num_iterations = 500
learning_rate = 0.01
linear_regression = Linear_regression(x_train, y_train)
(theta, cost_history) = linear_regression.train(learning_rate, num_iterations)

print('----------------------------------------')
print('开始时的损失：', cost_history[0])
print('结束时的损失：', cost_history[-1])
ax2 = fig.add_subplot(2, 2, 2)
ax2.plot(range(num_iterations), cost_history)
ax2.set_xlabel('iter')
ax2.set_ylabel('cost')
ax2.set_title('cost_plot')

# 创建预测值
prediction_nums = 100
x_prediction = np.linspace(x_train.min(), x_train.max(), prediction_nums).reshape(prediction_nums, 1)
y_prediction = linear_regression.get_predict(x_prediction)
# 绘制线性图
ax3 = fig.add_subplot(2, 2, 3)
ax3.scatter(x_train, y_train, label='train_data')
ax3.plot(x_prediction, y_prediction, label='linear', color='r')
ax3.legend()
plt.show()