一、单变量线性回归
(一)梯度下降法
Hypothesis function
h θ ( x ) = θ 0 + θ 1 x h_{\theta }\left ( x \right )=\theta _{0}+\theta _{1}x hθ(x)=θ0+θ1x
# Hypothesis
def H(theta0, X0):
return np.dot(X0, theta0)
Cost function
J ( θ 0 , θ 1 ) = 1 2 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) 2 J\left ( \theta _{_{0}} , \theta _{1} \right )=\frac{1}{2m}\sum_{i=1}^{m}\left(h_{\theta }\left ( x^{\left ( i \right )} \right ) -y^{\left ( i \right )}\right )^{2} J(θ0,θ1)=2m1i=1∑m(hθ(x(i))−y(i))2
# costFunction
def J(theta0, X0, y0):
return np.sum(np.square(H(theta0, X0) - y0)) / (2*len(y0)) # 正确,返回的是数字
# return np.dot((H(theta, X) - y).T, H(theta, X) - y) / (2*len(y0)) 错误,返回的是一个矩阵
# np.sum(A) #将矩阵(numpy数组)中的每一个元素加和,返回总和
# np.square(A) #将矩阵(numpy数组)中的每一个元素平方,返回矩阵
Gradient descent
写成矩阵或向量形式,其中
x
x
x多加一列
X
=
[
1
x
(
1
)
1
x
(
2
)
⋮
⋮
1
x
(
m
)
]
,
θ
→
=
[
θ
0
θ
1
]
X= \begin{bmatrix} 1 & x^{(1)} \\ 1 & x^{(2)} \\ \vdots & \vdots \\ 1 & x^{(m)} \end{bmatrix}, \overrightarrow{\theta}= \begin{bmatrix} \theta_0 \\ \theta_1 \\ \end{bmatrix}
X=⎣⎢⎢⎢⎡11⋮1x(1)x(2)⋮x(m)⎦⎥⎥⎥⎤,θ=[θ0θ1]
假设函数可以写为
h
θ
(
x
)
=
X
θ
→
=
[
θ
0
+
θ
1
x
(
1
)
θ
0
+
θ
1
x
(
2
)
⋮
θ
0
+
θ
1
x
(
m
)
]
h_\theta(x)=X \overrightarrow{\theta}= \begin{bmatrix} \theta_0+\theta_1 x^{(1)} \\ \theta_0+\theta_1 x^{(2)}\\ \vdots \\ \theta_0+\theta_1 x^{(m)} \end{bmatrix}
hθ(x)=Xθ=⎣⎢⎢⎢⎡θ0+θ1x(1)θ0+θ1x(2)⋮θ0+θ1x(m)⎦⎥⎥⎥⎤
h
θ
(
x
)
−
y
→
=
X
θ
→
−
y
→
=
[
θ
0
+
θ
1
x
(
1
)
−
y
(
1
)
θ
0
+
θ
1
x
(
2
)
−
y
(
2
)
⋮
θ
0
+
θ
1
x
(
m
)
−
y
(
m
)
]
h_\theta(x)-\overrightarrow{y}=X \overrightarrow{\theta}- \overrightarrow{y}= \begin{bmatrix} \theta_0+\theta_1 x^{(1)}-y^{(1)} \\ \theta_0+\theta_1 x^{(2)}-y^{(2)}\\ \vdots \\ \theta_0+\theta_1 x^{(m)}-y^{(m)} \end{bmatrix}
hθ(x)−y=Xθ−y=⎣⎢⎢⎢⎡θ0+θ1x(1)−y(1)θ0+θ1x(2)−y(2)⋮θ0+θ1x(m)−y(m)⎦⎥⎥⎥⎤
X转置
X
T
=
[
1
1
⋯
1
x
(
1
)
x
(
2
)
⋯
x
(
m
)
]
X^T= \begin{bmatrix} 1 &1 & \cdots &1\\ x^{(1)} & x^{(2)}& \cdots&x^{(m)} \end{bmatrix}
XT=[1x(1)1x(2)⋯⋯1x(m)]
故
X
T
[
h
θ
(
x
)
−
y
→
]
=
[
∑
i
=
1
m
(
θ
0
+
θ
1
x
(
i
)
−
y
(
i
)
)
∑
i
=
1
m
(
θ
0
+
θ
1
x
(
i
)
−
y
(
i
)
)
x
(
i
)
]
X^T[h_\theta(x)-\overrightarrow{y}]= \begin{bmatrix} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})\\ \\ \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})x^{(i)} \end{bmatrix}
XT[hθ(x)−y]=⎣⎡∑i=1m(θ0+θ1x(i)−y(i))∑i=1m(θ0+θ1x(i)−y(i))x(i)⎦⎤
所以,在梯度下降的for循环中,可以用矩阵同时更新所有
θ
\theta
θ
θ
→
=
θ
→
−
α
m
X
T
[
h
θ
(
x
)
−
y
→
]
\overrightarrow{\theta}=\overrightarrow{\theta}- \frac{\alpha}{m}X^T[h_\theta(x)-\overrightarrow{y}]
θ=θ−mαXT[hθ(x)−y]
即
[
θ
0
θ
1
]
=
[
θ
0
θ
1
]
−
α
[
1
m
∑
i
=
1
m
(
θ
0
+
θ
1
x
(
i
)
−
y
(
i
)
)
1
m
∑
i
=
1
m
(
θ
0
+
θ
1
x
(
i
)
−
y
(
i
)
)
x
(
i
)
]
\begin{bmatrix} \theta_0 \\ \\ \theta_1 \\ \end{bmatrix}= \begin{bmatrix} \theta_0 \\ \\ \theta_1 \\ \end{bmatrix}-\alpha \begin{bmatrix} \frac{1}{m} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})\\ \\ \frac{1}{m} \sum_{i=1}^{m}(\theta_0+\theta_1x^{(i)}-y^{(i)})x^{(i)} \end{bmatrix}
⎣⎡θ0θ1⎦⎤=⎣⎡θ0θ1⎦⎤−α⎣⎡m1∑i=1m(θ0+θ1x(i)−y(i))m1∑i=1m(θ0+θ1x(i)−y(i))x(i)⎦⎤
def gradientDescent(X0, y0, theta0, alpha0, num_iters):
J_history0 = np.zeros(num_iters)
for i in range(num_iters):
theta0 = theta0 - (alpha0 / len(y0)) * np.dot(X0.T, H(theta0, X0) - y0)
# 看上面的公式推导
J_history0[i] = J(theta0, X0, y0)
print(J_history0[i])
return theta0, J_history0
完整代码
"""2021.1.24单变量线性回归"""
"""Gradient Descent法"""
import numpy as np
import matplotlib.pyplot as plt
# Loading the Data
dataname = 'ex1data1.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1), unpack=True)
# delimiter=','表示用逗号分隔
# usecols(0,1)代表读取第1列,第2列
# unpack是指会把每一列当成一个向量输出而不是合并在一起
X = data[0, :][:, np.newaxis] # X比y多一维,否则无法在X中插入一列1,暂时没找到合适的解决方法
X = np.insert(X, 0, 1, axis=1) # 在X列向量的基础上插入第1列,元素均为1
y = data[1, :]
m = np.size(y) # 样本数量
# Gradient Descent
iteration = 1500
alpha = 0.01
theta = np.zeros(2)
theta, J_history = gradientDescent(X, y, theta, alpha, iteration)
# Plot the convergence graph
plt.figure(figsize=(10, 6))
iter_array = np.arange(1, iteration + 1)
# 构造一个从1到1501的向量,作为横坐标
plt.plot(iter_array, J_history, 'b-')
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
# 原始数据点
plt.figure(figsize=(10, 6))
plt.grid(True)
plt.ylabel('Profit in $10,000s')
plt.xlabel('Population of City in 10,000s')
plt.plot(X[:, 1], y, 'r.', markersize=10)
# 拟合后的曲线
xx = np.arange(5, 23)
yy = theta[0] + theta[1] * xx
plt.plot(xx, yy, 'b-')
plt.show()
二、多变量线性回归
(一)梯度下降法
Hypothesis function
h θ ( x ) = θ 0 + θ 1 x 1 + θ 2 x 2 + ⋯ + θ n x n = θ T X h_\theta(x)=\theta_0+ \theta_1x_1+\theta_2x_2+\dots+\theta_nx_n=\theta^TX hθ(x)=θ0+θ1x1+θ2x2+⋯+θnxn=θTX
# Hypothesis
def H(theta0, X0):
return np.dot(X0, theta0)
Cost function
J ( θ 0 , θ 1 , … , θ n ) = 1 2 m ∑ i = 1 m ( h θ ( x ( i ) ) − y ( i ) ) 2 J(\theta_0,\theta_1,\dots,\theta_n)= \frac{1}{2m}\sum_{i=1}^{m}(h_\theta(x^{(i)})-y^{(i)})^2 J(θ0,θ1,…,θn)=2m1i=1∑m(hθ(x(i))−y(i))2
# Cost function
def J(theta0, X0, y0):
return np.sum(np.square(H(theta0, X0) - y0)) / (2*len(y0))
Mean normalization
# Mean normalization (Feature Scaling也可以,选一种即可)
# 均值归一化有两种https://www.cnblogs.com/wt714/p/12545129.html
def featureNormalize(X0):
X_norm = X0
mu = np.zeros(X0.shape[1]) # 初始化
sigma = np.zeros(X0.shape[1]) # 其实这两条代码没有用
mu = X0.mean(axis=0)
sigma = np.std(X0, axis=0)
for i in range(X0.shape[1]):
X_norm[:, i] = (X0[:, i] - mu[i]) / sigma[i]
# 一列一列地标准化
# X.shape[1]获取X的列数,X.shape[0]获取行数
# X.mean(axis=0)获取每一列的均值,构成行向量
# np.std(X, axis=0)获取每一列的方差,构成行向量
return X_norm
Gradient descent multi
def gradientDescentMulti(X0, y0, theta0, alpha0, num_iters):
J_history0 = np.zeros(num_iters)
for i in range(num_iters):
theta0 = theta0 - (alpha0 / len(y0)) * np.dot(X0.T, H(theta0, X0) - y0)
J_history0[i] = J(theta0, X0, y0)
print(J_history0[i])
return theta0, J_history0
完整代码
"""2021.1.24多变量线性回归"""
"""Gradient Descent法"""
import numpy as np
import matplotlib.pyplot as plt
# Loading the Data
dataname = 'ex1data2.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1, 2), unpack=True)
X = data[0:2, :]
X = X.T # 此时,得到的X是2*m向量(列表),要转置为m*2向量
X = np.insert(X, 0, 1, axis=1) # 目前没找到合适的方法,X不升维就没法插入列
X = featureNormalize(X)
y = data[2, :]
m = np.size(y)
# Gradient Descent
iteration = 400
alpha = 0.01
theta = np.zeros(3)
theta, J_history = gradientDescentMulti(X, y, theta, alpha, iteration)
# Plot the convergence graph
plt.figure(figsize=(10, 6))
iter_array = np.arange(1, iteration + 1) # 构造一个从1到401的向量,作为横坐标
plt.plot(iter_array, J_history, 'b-')
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
plt.show()
(二)正规方程法
直接求θ
由
X
θ
→
=
y
→
X\overrightarrow{\theta}=\overrightarrow{y}
Xθ=y
得
θ
→
=
(
X
T
X
)
−
1
X
T
y
→
\overrightarrow{\theta}=(X^{T}X)^{-1}X^{T}\overrightarrow{y}
θ=(XTX)−1XTy
即最小二乘法的矩阵形式
def standRegression(X0, y0):
XTX = np.dot(X0, X0.T)
if np.linalg.det(XTX) == 0.0:
print("This matrix is singular, cannot do inverse")
return
else:
theta0 = np.dot(np.dot(np.linalg.inv(XTX), X0.T), y0)
return theta0
# np.linalg.inv(XTX)求逆矩阵
完整代码
"""2021.1.24单变量线性回归"""
"""Normal Equation法"""
import numpy as np
# Loading the Data
dataname = 'ex1data2.txt'
data = np.loadtxt(dataname, delimiter=',', usecols=(0, 1, 2), unpack=True)
X = data[0:2, :]
X = X.T # 此时,得到的X是2*m向量(列表),要转置为m*2向量
X = np.insert(X, 0, 1, axis=1) # 在X列向量的基础上插入第1列,元素均为1
y = data[2, :][:, np.newaxis] # 此时,得到的y是m*1向量(列表),它已经是列向量了
m = np.size(y)
theta = standRegression(X, y)
函数定义没有写在完整代码里。
转载需注明网址。谢谢。
https://blog.csdn.net/Wolf_AgOH/article/details/113092131