代码实现损失函数的参数求解
一、正规方程组法
import numpy as np
import matplotlib.pyplot as plt
X = 2 * np.random.rand(100,1) #rand(100,1)生成100个1维的随机均匀分布
y = 4 + 3 * X + np.random.randn(100,1) #randn()生成标准正态分布
#整合X0和X1
X_b = np.c_[np.ones((100,1)),X]
print(X_b)
#常规等式求解theta
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y) #inv()inverse求逆
print(theta_best) #W0 W1 速度快但不适合数据量大的求解
#创建测试集里面的X1
X_new = np.array([[0],
[2]])
X_new_b = np.c_[(np.ones((2,1))),X_new]
print(X_new_b)
y_predict = X_new_b.dot(theta_best)
print(y_predict)
plt.plot(X_new,y_predict,'r-') #预测结果
plt.plot(X,y,'b.') #真实值
plt.axis([0,2,0,15])
plt.show()
输出结果:w0=4.32649351,w1=2.83991238
二、使用sklearn方式求解
import numpy as np
from sklearn.linear_model import LinearRegression
X = 2 * np.random.rand(100,1)
y = 4 + 3 * X + np.random.randn(100,1)
lin_reg = LinearRegression()
lin_reg.fit(X,y)
print(lin_reg.intercept_,lin_reg.coef_) #输出模型的斜率和截距
X_new = np.array([[0],[2]])
print(lin_reg.predict(X_new))
输出结果:w0=4.2259848,w1=2.72704983
三、批量梯度下降法
import numpy as np
X = 2 * np.random.rand(100,1)
y = 4 + 3 * X + np.random.randn(100,1)
X_b = np.c_[np.ones((100,1)),X]
learning_rate = 0.1
n_iterations = 1000
m = 100
#初始化theta,w0...wn
theta = np.random.randn(2,1)
count = 0
#4、不会设置阈值,设置超参数迭代次数,迭代次数到了我们就认为收敛了
for interation in range(n_iterations):
count += 1
#2、求梯度
gradients = 1/m * X_b.T.dot(X_b.dot(theta)-y)
#3、调整theta
theta = theta - learning_rate * gradients
print(count)
print(theta)
输出结果:w0=3.98391228,w1=2.88496664
四、随机梯度下降
下面代码里面除了随机抽取一条数据来求解梯度,还随着迭代次数的增多,不断减小步长(学习率)
为了让越接近最优解的时候,调整幅度越小,避免来回震荡!
调整的幅度取决于学习率和梯度,梯度事实上越接近最优解,梯度绝对值越小。因此如果不人为的调小步长,调整的幅度会自动减小。
import numpy as np
X = 2 * np.random.rand(100,1)
y = 4 + 3 * X + np.random.randn(100,1)
X_b = np.c_[np.ones((100,1)),X]
n_epochs = 500
t0, t1 = 5, 50 #超参数
m = 100
def learning_schedule(t): #随着迭代次数的增加,学习率逐渐减小。即使学习率不变,随着迭代次数的增加,梯度也在不断的变化
return t0 / (t + t1)
theta = np.random.randn(2,1)
for epoch in range(n_epochs):
for i in range(m):
random_index = np.random.randint(m)
xi = X_b[random_index:random_index+1]
yi = y[random_index:random_index+1]
gradients = 2 * xi.T.dot(xi.dot(theta)-yi)
learning_rate = learning_schedule(epoch*i + i)
theta = theta - learning_rate * gradients
print(theta)
输出结果:w0=4.35270807,w1=3.00822779