一个简单的作业记录
一、数据处理
# 读取文件 分号分隔 第一行列名
df = pd.read_csv('winequality-red.csv', sep=';', header=0)
# 分成特征组和标签组
X = df.drop("quality", axis=1).values
y = df["quality"].values
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
#划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
二、模型训练
class LinearRegressionGD:
def __init__(self, learning_rate=0.0001, n_iters=1000):
self.lr = learning_rate # 学习率
self.n_iters = n_iters # 迭代次数
self.weights = None # w
self.bias = None # b
self.loss = [] # 损失值 画损失函数收敛曲线用
# 训练
def fit(self, X, y):
# 初始化w_0和b
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
# 梯度下降法
for i in range(self.n_iters):
h_x = np.dot(X, self.weights) + self.bias
dw = (1 / n_samples) * np.dot(X.T, (h_x - y))
db = (1 / n_samples) * np.sum(h_x - y)
self.weights -= self.lr * dw
self.bias -= self.lr * db
loss = np.mean((h_x - y) ** 2)
self.loss.append(loss)
# 预测
def predict(self, X):
return np.round(np.dot(X, self.weights) + self.bias)
三、调整步长
# 三个步长各自输出预测结果
learning_rates = [0.0001, 0.0002, 0.0005]
models = []
for lr in learning_rates:
model = LinearRegressionGD(learning_rate=lr, n_iters=100)
model.fit(X_train, y_train)
models.append(model)
y_pred = model.predict(X_test)
print(y_pred)
四、损失函数收敛曲线
# 绘制损失函数的收敛曲线
plt.figure(figsize=(10, 6)) # 画布
for i, model in enumerate(models):
# 画线
plt.plot(range(1, model.n_iters + 1), model.loss, label=f'Learning Rate = {learning_rates[i]}')
plt.xlabel('Iterations') # 横坐标
plt.ylabel('Loss') # 纵坐标
plt.title('Convergence of Loss Function with Different Learning Rates')
plt.legend() # 图例
plt.show() # 显示
五、实验结果
预测值:
损失函数收敛曲线:
六、个人总结
步长对迭代结果的影响还是很大的,不知道是不是我写的不严谨,最开始把学习率定在0.001,根本不收敛,波动的没法看,记得步长小容易陷入局部最优值,步长大容易波动,原来0.001就算大了(