【机器学习】线性回归

最新推荐文章于 2022-06-28 15:23:21 发布

逍遥de鱼

最新推荐文章于 2022-06-28 15:23:21 发布

阅读量168

点赞数

文章标签： python 机器学习

本文链接：https://blog.csdn.net/Wolf_AgOH/article/details/113092131

版权

一、单变量线性回归

（一）梯度下降法

Hypothesis function

$h_{\theta }\left ( x \right )=\theta _{0}+\theta _{1}x$

# Hypothesis
def H(theta0, X0):
    return np.dot(X0, theta0)

Cost function

$J\left ( \theta _{_{0}} , \theta _{1} \right )=\frac{1}{2m}\sum_{i=1}^{m}\left(h_{\theta }\left ( x^{\left ( i \right )} \right ) -y^{\left ( i \right )}\right )^{2}$

# costFunction
def J(theta0, X0, y0):
    return np.sum(np.square(H(theta0, X0) - y0)) / (2*len(y0))  # 正确，返回的是数字
# return np.dot((H(theta, X) - y).T, H(theta, X) - y) / (2*len(y0)) 错误，返回的是一个矩阵
# np.sum(A)    #将矩阵(numpy数组)中的每一个元素加和，返回总和
# np.square(A) #将矩阵(numpy数组)中的每一个元素平方，返回矩阵

Gradient descent

写成矩阵或向量形式，其中 $x$ 多加一列
$\begin{bmatrix} 1 & x^{(1)} \\ 1 & x^{(2)} \\ \vdots & \vdots \\ 1 & x^{(m)} \end{bmatrix}, \overrightarrow{\theta}= \begin{bmatrix} \theta_0 \\ \theta_1 \\ \end{bmatrix}$
假设函数可以写为
$h_\theta(x)=X \overrightarrow{\theta}= \begin{bmatrix} \theta_0+\theta_1 x^{(1)} \\ \theta_0+\theta_1 x^{(2)}\\ \vdots \\ \theta_0+\theta_1 x^{(m)} \end{bmatrix}$

$h_\theta(x)-\overrightarrow{y}=X \overrightarrow{\theta}- \overrightarrow{y}= \begin{bmatrix} \theta_0+\theta_1 x^{(1)}-y^{(1)} \\ \theta_0+\theta_1 x^{(2)}-y^{(2)}\\ \vdots \\ \theta_0+\theta_1 x^{(m)}-y^{(m)} \end{bmatrix}$
X转置
$X^T= \begin{bmatrix} 1 &1 & \cdots &1\\ x^{(1)} & x^{(2)}& \cdots&x^{(m)} \end{bmatrix}$
故
$X^T[h_\theta(x)-\overrightarrow{y}]= \begin{bmatrix} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})\\ \\ \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})x^{(i)} \end{bmatrix}$
所以，在梯度下降的for循环中，可以用矩阵同时更新所有 $\theta$
$\overrightarrow{\theta}=\overrightarrow{\theta}- \frac{\alpha}{m}X^T[h_\theta(x)-\overrightarrow{y}]$
即
$\begin{bmatrix} \theta_0 \\ \\ \theta_1 \\ \end{bmatrix}= \begin{bmatrix} \theta_0 \\ \\ \theta_1 \\ \end{bmatrix}-\alpha \begin{bmatrix} \frac{1}{m} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})\\ \\ \frac{1}{m} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})x^{(i)} \end{bmatrix}$

def gradientDescent(X0, y0, theta0, alpha0, num_iters):
    J_history0 = np.zeros(num_iters)

    for i in range(num_iters):
        theta0 = theta0 - (alpha0 / len(y0)) * np.dot(X0.T, H(theta0, X0) - y0)
        # 看上面的公式推导
        J_history0[i] = J(theta0, X0, y0)
        print(J_history0[i])

    return theta0, J_history0

完整代码

"""2021.1.24单变量线性回归"""
"""Gradient Descent法"""
import numpy as np
import matplotlib.pyplot as plt

# Loading the Data
dataname = 'ex1data1.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1), unpack=True)
# delimiter=','表示用逗号分隔
# usecols(0,1)代表读取第1列，第2列
# unpack是指会把每一列当成一个向量输出而不是合并在一起

X = data[0, :][:, np.newaxis]   # X比y多一维，否则无法在X中插入一列1，暂时没找到合适的解决方法
X = np.insert(X, 0, 1, axis=1)  # 在X列向量的基础上插入第1列，元素均为1

y = data[1, :]
m = np.size(y)  # 样本数量

# Gradient Descent
iteration = 1500
alpha = 0.01
theta = np.zeros(2)

theta, J_history = gradientDescent(X, y, theta, alpha, iteration)

# Plot the convergence graph
plt.figure(figsize=(10, 6))
iter_array = np.arange(1, iteration + 1)
# 构造一个从1到1501的向量，作为横坐标
plt.plot(iter_array, J_history, 'b-')
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')

# 原始数据点
plt.figure(figsize=(10, 6))
plt.grid(True)
plt.ylabel('Profit in $10,000s')
plt.xlabel('Population of City in 10,000s')
plt.plot(X[:, 1], y, 'r.', markersize=10)

# 拟合后的曲线
xx = np.arange(5, 23)
yy = theta[0] + theta[1] * xx
plt.plot(xx, yy, 'b-')
plt.show()

二、多变量线性回归

（一）梯度下降法

Hypothesis function

$h_\theta(x)=\theta_0+ \theta_1x_1+\theta_2x_2+\dots+\theta_nx_n=\theta^TX$

# Hypothesis
def H(theta0, X0):
    return np.dot(X0, theta0)

Cost function

$J(\theta_0,\theta_1,\dots,\theta_n)= \frac{1}{2m}\sum_{i=1}^{m}(h_\theta(x^{(i)})-y^{(i)})^2$

# Cost function
def J(theta0, X0, y0):
    return np.sum(np.square(H(theta0, X0) - y0)) / (2*len(y0))

Mean normalization

# Mean normalization (Feature Scaling也可以，选一种即可)
# 均值归一化有两种https://www.cnblogs.com/wt714/p/12545129.html
def featureNormalize(X0):
    X_norm = X0
    mu = np.zeros(X0.shape[1])     # 初始化
    sigma = np.zeros(X0.shape[1])  # 其实这两条代码没有用

    mu = X0.mean(axis=0)
    sigma = np.std(X0, axis=0)

    for i in range(X0.shape[1]):
        X_norm[:, i] = (X0[:, i] - mu[i]) / sigma[i]
    # 一列一列地标准化
    # X.shape[1]获取X的列数，X.shape[0]获取行数
    # X.mean(axis=0)获取每一列的均值，构成行向量
    # np.std(X, axis=0)获取每一列的方差，构成行向量
    return X_norm

Gradient descent multi

def gradientDescentMulti(X0, y0, theta0, alpha0, num_iters):
    J_history0 = np.zeros(num_iters)

    for i in range(num_iters):
        theta0 = theta0 - (alpha0 / len(y0)) * np.dot(X0.T, H(theta0, X0) - y0)
        J_history0[i] = J(theta0, X0, y0)
        print(J_history0[i])

    return theta0, J_history0

完整代码

"""2021.1.24多变量线性回归"""
"""Gradient Descent法"""
import numpy as np
import matplotlib.pyplot as plt

# Loading the Data
dataname = 'ex1data2.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1, 2), unpack=True)

X = data[0:2, :]
X = X.T                         # 此时，得到的X是2*m向量（列表），要转置为m*2向量
X = np.insert(X, 0, 1, axis=1)  # 目前没找到合适的方法，X不升维就没法插入列
X = featureNormalize(X)

y = data[2, :]
m = np.size(y)

# Gradient Descent
iteration = 400
alpha = 0.01
theta = np.zeros(3)

theta, J_history = gradientDescentMulti(X, y, theta, alpha, iteration)

# Plot the convergence graph
plt.figure(figsize=(10, 6))
iter_array = np.arange(1, iteration + 1)  # 构造一个从1到401的向量，作为横坐标
plt.plot(iter_array, J_history, 'b-')
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
plt.show()

（二）正规方程法

直接求θ

由
$X\overrightarrow{\theta}=\overrightarrow{y}$
得
$\overrightarrow{\theta}=(X^{T}X)^{-1}X^{T}\overrightarrow{y}$
即最小二乘法的矩阵形式

def standRegression(X0, y0):
    XTX = np.dot(X0, X0.T)
    if np.linalg.det(XTX) == 0.0:
        print("This matrix is singular, cannot do inverse")
        return
    else:
        theta0 = np.dot(np.dot(np.linalg.inv(XTX), X0.T), y0)
        return theta0
        # np.linalg.inv(XTX)求逆矩阵

完整代码

"""2021.1.24单变量线性回归"""
"""Normal Equation法"""
import numpy as np

# Loading the Data
dataname = 'ex1data2.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1, 2), unpack=True)
X = data[0:2, :]
X = X.T                         # 此时，得到的X是2*m向量（列表），要转置为m*2向量
X = np.insert(X, 0, 1, axis=1)  # 在X列向量的基础上插入第1列，元素均为1

y = data[2, :][:, np.newaxis]   # 此时，得到的y是m*1向量（列表），它已经是列向量了
m = np.size(y)

theta = standRegression(X, y)