《李沐动手学深度学习》P30
import numpy as np
import matplotlib.pyplot as plt
import random
# y = x * w + b + noise
# x dimension: (num_samples, 2)
# w dimension: (2, 1)
# b dimension: (1, 1)
# noise dimension: (num_samples, 1)
def generate_data(num_samples, w, b):
x = np.random.rand(num_samples, 2)
w = np.array(w).reshape(2, 1)
b = np.array(b).reshape(1, 1)
noise = np.random.normal(0, 0.01, (num_samples, 1))
y = np.dot(x, w) + b + noise
return x, y
def data_iter(x, y, batch_size):
num_samples = len(x)
indices = list(range(num_samples))
random.shuffle(indices)
for i in range(0, num_samples, batch_size):
j = indices[i: min(i + batch_size, num_samples)]
yield x[j], y[j]
def model(x, w, b):
return np.dot(x, w) + b
# squared loss
def loss(y_hat, y):
return np.mean((y_hat - y) ** 2) / 2
# calculate the gradient of loss function
def gradient(x, y, y_hat):
num_samples = len(x)
dw = np.dot(x.T, (y_hat - y)) / num_samples
db = np.sum(y_hat - y) / num_samples
return dw, db
def sgd(params, grads, lr):
for param, grad in zip(params, grads):
param -= lr * grad # 注意这里不需要再除以bacth_size了,因为上面的gradient()已经对梯度做了平均
def train(x, y, w, b, lr, batch_size, num_epochs):
for epoch in range(num_epochs):
for data, label in data_iter(x, y, batch_size):
y_hat = model(data, w, b)
l = loss(y_hat, label)
dw, db = gradient(data, label, y_hat)
sgd([w, b], [dw, db], lr)
print('epoch %d, loss %f' % (epoch + 1, l))
def main():
num_samples = 1000
x, y = generate_data(num_samples, [2, -3], 5)
w = np.random.normal(0, 0.01, (2, 1))
b = np.zeros((1, 1))
lr = 0.1
batch_size = 10
num_epochs = 5
train(x, y, w, b, lr, batch_size, num_epochs)
print(w, b)
if __name__ == '__main__':
main()