【机器学习】线性回归

一、单变量线性回归

(一)梯度下降法

Hypothesis function

h θ ( x ) = θ 0 + θ 1 x h_{\theta }\left ( x \right )=\theta _{0}+\theta _{1}x hθ(x)=θ0+θ1x

# Hypothesis
def H(theta0, X0):
    return np.dot(X0, theta0)
Cost function

J ( θ 0 , θ 1 ) = 1 2 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) 2 J\left ( \theta _{_{0}} , \theta _{1} \right )=\frac{1}{2m}\sum_{i=1}^{m}\left(h_{\theta }\left ( x^{\left ( i \right )} \right ) -y^{\left ( i \right )}\right )^{2} J(θ0,θ1)=2m1i=1m(hθ(x(i))y(i))2

# costFunction
def J(theta0, X0, y0):
    return np.sum(np.square(H(theta0, X0) - y0)) / (2*len(y0))  # 正确,返回的是数字
# return np.dot((H(theta, X) - y).T, H(theta, X) - y) / (2*len(y0)) 错误,返回的是一个矩阵
# np.sum(A)    #将矩阵(numpy数组)中的每一个元素加和,返回总和
# np.square(A) #将矩阵(numpy数组)中的每一个元素平方,返回矩阵
Gradient descent

写成矩阵或向量形式,其中 x x x多加一列
X = [ 1 x ( 1 ) 1 x ( 2 ) ⋮ ⋮ 1 x ( m ) ] , θ → = [ θ 0 θ 1 ] X= \begin{bmatrix} 1 & x^{(1)} \\ 1 & x^{(2)} \\ \vdots & \vdots \\ 1 & x^{(m)} \end{bmatrix}, \overrightarrow{\theta}= \begin{bmatrix} \theta_0 \\ \theta_1 \\ \end{bmatrix} X=111x(1)x(2)x(m),θ =[θ0θ1]
假设函数可以写为
h θ ( x ) = X θ → = [ θ 0 + θ 1 x ( 1 ) θ 0 + θ 1 x ( 2 ) ⋮ θ 0 + θ 1 x ( m ) ] h_\theta(x)=X \overrightarrow{\theta}= \begin{bmatrix} \theta_0+\theta_1 x^{(1)} \\ \theta_0+\theta_1 x^{(2)}\\ \vdots \\ \theta_0+\theta_1 x^{(m)} \end{bmatrix} hθ(x)=Xθ =θ0+θ1x(1)θ0+θ1x(2)θ0+θ1x(m)

h θ ( x ) − y → = X θ → − y → = [ θ 0 + θ 1 x ( 1 ) − y ( 1 ) θ 0 + θ 1 x ( 2 ) − y ( 2 ) ⋮ θ 0 + θ 1 x ( m ) − y ( m ) ] h_\theta(x)-\overrightarrow{y}=X \overrightarrow{\theta}- \overrightarrow{y}= \begin{bmatrix} \theta_0+\theta_1 x^{(1)}-y^{(1)} \\ \theta_0+\theta_1 x^{(2)}-y^{(2)}\\ \vdots \\ \theta_0+\theta_1 x^{(m)}-y^{(m)} \end{bmatrix} hθ(x)y =Xθ y =θ0+θ1x(1)y(1)θ0+θ1x(2)y(2)θ0+θ1x(m)y(m)
X转置
X T = [ 1 1 ⋯ 1 x ( 1 ) x ( 2 ) ⋯ x ( m ) ] X^T= \begin{bmatrix} 1 &1 & \cdots &1\\ x^{(1)} & x^{(2)}& \cdots&x^{(m)} \end{bmatrix} XT=[1x(1)1x(2)1x(m)]

X T [ h θ ( x ) − y → ] = [ ∑ i = 1 m ( θ 0 + θ 1 x ( i ) − y ( i ) ) ∑ i = 1 m ( θ 0 + θ 1 x ( i ) − y ( i ) ) x ( i ) ] X^T[h_\theta(x)-\overrightarrow{y}]= \begin{bmatrix} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})\\ \\ \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})x^{(i)} \end{bmatrix} XT[hθ(x)y ]=i=1m(θ0+θ1x(i)y(i))i=1m(θ0+θ1x(i)y(i))x(i)
所以,在梯度下降的for循环中,可以用矩阵同时更新所有 θ \theta θ
θ → = θ → − α m X T [ h θ ( x ) − y → ] \overrightarrow{\theta}=\overrightarrow{\theta}- \frac{\alpha}{m}X^T[h_\theta(x)-\overrightarrow{y}] θ =θ mαXT[hθ(x)y ]

[ θ 0 θ 1 ] = [ θ 0 θ 1 ] − α [ 1 m ∑ i = 1 m ( θ 0 + θ 1 x ( i ) − y ( i ) ) 1 m ∑ i = 1 m ( θ 0 + θ 1 x ( i ) − y ( i ) ) x ( i ) ] \begin{bmatrix} \theta_0 \\ \\ \theta_1 \\ \end{bmatrix}= \begin{bmatrix} \theta_0 \\ \\ \theta_1 \\ \end{bmatrix}-\alpha \begin{bmatrix} \frac{1}{m} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})\\ \\ \frac{1}{m} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})x^{(i)} \end{bmatrix} θ0θ1=θ0θ1αm1i=1m(θ0+θ1x(i)y(i))m1i=1m(θ0+θ1x(i)y(i))x(i)

def gradientDescent(X0, y0, theta0, alpha0, num_iters):
    J_history0 = np.zeros(num_iters)

    for i in range(num_iters):
        theta0 = theta0 - (alpha0 / len(y0)) * np.dot(X0.T, H(theta0, X0) - y0)
        # 看上面的公式推导
        J_history0[i] = J(theta0, X0, y0)
        print(J_history0[i])

    return theta0, J_history0
完整代码
"""2021.1.24单变量线性回归"""
"""Gradient Descent法"""
import numpy as np
import matplotlib.pyplot as plt

# Loading the Data
dataname = 'ex1data1.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1), unpack=True)
# delimiter=','表示用逗号分隔
# usecols(0,1)代表读取第1列,第2列
# unpack是指会把每一列当成一个向量输出而不是合并在一起

X = data[0, :][:, np.newaxis]   # X比y多一维,否则无法在X中插入一列1,暂时没找到合适的解决方法
X = np.insert(X, 0, 1, axis=1)  # 在X列向量的基础上插入第1列,元素均为1

y = data[1, :]
m = np.size(y)  # 样本数量

# Gradient Descent
iteration = 1500
alpha = 0.01
theta = np.zeros(2)

theta, J_history = gradientDescent(X, y, theta, alpha, iteration)

# Plot the convergence graph
plt.figure(figsize=(10, 6))
iter_array = np.arange(1, iteration + 1)
# 构造一个从1到1501的向量,作为横坐标
plt.plot(iter_array, J_history, 'b-')
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')

# 原始数据点
plt.figure(figsize=(10, 6))
plt.grid(True)
plt.ylabel('Profit in $10,000s')
plt.xlabel('Population of City in 10,000s')
plt.plot(X[:, 1], y, 'r.', markersize=10)

# 拟合后的曲线
xx = np.arange(5, 23)
yy = theta[0] + theta[1] * xx
plt.plot(xx, yy, 'b-')
plt.show()

二、多变量线性回归

(一)梯度下降法

Hypothesis function

h θ ( x ) = θ 0 + θ 1 x 1 + θ 2 x 2 + ⋯ + θ n x n = θ T X h_\theta(x)=\theta_0+ \theta_1x_1+\theta_2x_2+\dots+\theta_nx_n=\theta^TX hθ(x)=θ0+θ1x1+θ2x2++θnxn=θTX

# Hypothesis
def H(theta0, X0):
    return np.dot(X0, theta0)
Cost function

J ( θ 0 , θ 1 , … , θ n ) = 1 2 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) 2 J(\theta_0,\theta_1,\dots,\theta_n)= \frac{1}{2m}\sum_{i=1}^{m}(h_\theta(x^{(i)})-y^{(i)})^2 J(θ0,θ1,,θn)=2m1i=1m(hθ(x(i))y(i))2

# Cost function
def J(theta0, X0, y0):
    return np.sum(np.square(H(theta0, X0) - y0)) / (2*len(y0))
Mean normalization
# Mean normalization (Feature Scaling也可以,选一种即可)
# 均值归一化有两种https://www.cnblogs.com/wt714/p/12545129.html
def featureNormalize(X0):
    X_norm = X0
    mu = np.zeros(X0.shape[1])     # 初始化
    sigma = np.zeros(X0.shape[1])  # 其实这两条代码没有用

    mu = X0.mean(axis=0)
    sigma = np.std(X0, axis=0)

    for i in range(X0.shape[1]):
        X_norm[:, i] = (X0[:, i] - mu[i]) / sigma[i]
    # 一列一列地标准化
    # X.shape[1]获取X的列数,X.shape[0]获取行数
    # X.mean(axis=0)获取每一列的均值,构成行向量
    # np.std(X, axis=0)获取每一列的方差,构成行向量
    return X_norm
Gradient descent multi
def gradientDescentMulti(X0, y0, theta0, alpha0, num_iters):
    J_history0 = np.zeros(num_iters)

    for i in range(num_iters):
        theta0 = theta0 - (alpha0 / len(y0)) * np.dot(X0.T, H(theta0, X0) - y0)
        J_history0[i] = J(theta0, X0, y0)
        print(J_history0[i])

    return theta0, J_history0
完整代码
"""2021.1.24多变量线性回归"""
"""Gradient Descent法"""
import numpy as np
import matplotlib.pyplot as plt

# Loading the Data
dataname = 'ex1data2.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1, 2), unpack=True)

X = data[0:2, :]
X = X.T                         # 此时,得到的X是2*m向量(列表),要转置为m*2向量
X = np.insert(X, 0, 1, axis=1)  # 目前没找到合适的方法,X不升维就没法插入列
X = featureNormalize(X)

y = data[2, :]
m = np.size(y)

# Gradient Descent
iteration = 400
alpha = 0.01
theta = np.zeros(3)

theta, J_history = gradientDescentMulti(X, y, theta, alpha, iteration)

# Plot the convergence graph
plt.figure(figsize=(10, 6))
iter_array = np.arange(1, iteration + 1)  # 构造一个从1到401的向量,作为横坐标
plt.plot(iter_array, J_history, 'b-')
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
plt.show()

(二)正规方程法

直接求θ


X θ → = y → X\overrightarrow{\theta}=\overrightarrow{y} Xθ =y

θ → = ( X T X ) − 1 X T y → \overrightarrow{\theta}=(X^{T}X)^{-1}X^{T}\overrightarrow{y} θ =(XTX)1XTy
即最小二乘法的矩阵形式

def standRegression(X0, y0):
    XTX = np.dot(X0, X0.T)
    if np.linalg.det(XTX) == 0.0:
        print("This matrix is singular, cannot do inverse")
        return
    else:
        theta0 = np.dot(np.dot(np.linalg.inv(XTX), X0.T), y0)
        return theta0
        # np.linalg.inv(XTX)求逆矩阵
完整代码
"""2021.1.24单变量线性回归"""
"""Normal Equation法"""
import numpy as np

# Loading the Data
dataname = 'ex1data2.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1, 2), unpack=True)
X = data[0:2, :]
X = X.T                         # 此时,得到的X是2*m向量(列表),要转置为m*2向量
X = np.insert(X, 0, 1, axis=1)  # 在X列向量的基础上插入第1列,元素均为1

y = data[2, :][:, np.newaxis]   # 此时,得到的y是m*1向量(列表),它已经是列向量了
m = np.size(y)

theta = standRegression(X, y)

函数定义没有写在完整代码里。
转载需注明网址。谢谢。
https://blog.csdn.net/Wolf_AgOH/article/details/113092131

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值