最优化线性回归分析(波士顿房价预测)和模型预测
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
boston = load_boston()
X = boston.data
y = boston.target
scaler = StandardScaler()
X = scaler.fit_transform(X)
X = np.concatenate([np.ones((X.shape[0], 1)), X], axis=1)
def gradient_descent(X, y, learning_rate, num_iterations):
num_samples, num_features = X.shape
theta = np.zeros(num_features)
loss_history = []
for i in range(num_iterations):
y_pred = np.dot(X, theta)
loss = np.mean((y_pred - y) ** 2)
loss_history.append(loss)
gradient = np.dot(X.T, (y_pred - y)) / num_samples
theta -= learning_rate * gradient
return theta, loss_history
learning_rate = 0.01
num_iterations = 1000
theta, loss_history = gradient_descent(X, y, learning_rate, num_iterations)
print("参数theta:", theta)
plt.plot(loss_history)
plt.xlabel("迭代次数")
plt.ylabel("损失函数")
plt.title("损失函数变化曲线")
plt.show()
优化代码
def gradient_descent(X, y, learning_rate, num_iterations):
num_samples, num_features = X.shape
theta = np.zeros(num_features)
last_loss = np.inf
loss_history = []
for i in range(num_iterations):
y_pred = np.dot(X, theta)
loss = np.mean((y_pred - y) ** 2)
gradient = np.dot(X.T, (y_pred - y)) / num_samples
theta -= learning_rate * gradient
if i % 10 == 0:
loss_history.append(loss)
if np.abs(last_loss - loss) < 1e-5:
break
last_loss = loss
return theta, loss_history
模型预测
theta = np.array([22.53183355, -0.78102837, 0.81194215, -0.27316748, 0.74256629, -1.57697862,
2.88943522, -0.10349181, -2.74240875, 1.45982586, -0.88309816, -1.95217556,
0.87224133, -3.64278025])
X_new = np.array([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0],
[14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0]])
y_pred = np.dot(X_new, theta[1:]) + theta[0]
print("预测值:", y_pred)