import numpy as np
import random
import matplotlib.pyplot as plt
class LinearRegression:
"""
y_hat = w*x+b
loss: 0.5*(y_hat-y)^2
dw = (y_hat-y)*x
db = y_hat-y
w = w-learning_rate*dw
b = b-learning_rate*db
"""
def __init__(self, learn_rate=0.001, iter_num=1000, batch_size=50):
self.lr = learn_rate
self.iter_num = iter_num
self.batch_size = batch_size
self.w_log = []
self.b_log = []
self.loss_log = []
@staticmethod
def calculate(w, b, x):
y_hat = w * x + b
return y_hat
@staticmethod
def gradient(y_hat, y, x):
diff = y_hat - y
dw = diff * x
db = diff
return dw, db
def avg_loss(self, w, b, x_batch, y_batch):
avg_l = 0.0
for i in range(self.batch_size):
avg_l += 0.5 * (self.calculate(w, b, x_batch[i]) - y_batch[i]) ** 2
avg_l /= self.batch_size
return avg_l
def cal_gradient(self, x_batch, y_batch, w, b):
avg_dw, avg_db = 0.0, 0.0
for i in range(self.batch_size):
y_hat = self.calculate(w, b, x_batch[i])
dw, db = self.gradient(y_hat, y_batch[i], x_batch[i])
avg_dw += dw
avg_db += db
avg_dw /= self.batch_size
avg_db /= self.batch_size
w -= self.lr * avg_dw
b -= self.lr * avg_db
return w, b
def train(self, x_list, y_list):
w = 0
b = 0
sample_count = len(x_list)
for i in range(self.iter_num):
batch_index = np.random.choice(sample_count, self.batch_size)
x_batch = [x_list[index] for index in batch_index]
y_batch = [y_list[index] for index in batch_index]
w, b = self.cal_gradient(x_batch, y_batch, w, b)
if i % 50 == 0:
loss = self.avg_loss(w, b, x_batch, y_batch)
self.w_log.append(w)
self.b_log.append(b)
self.loss_log.append(loss)
print('w: {0}, b: {1}, loss: {2}'.format(w, b, loss))
print('--------------------------------------------------------------------------')
def plot_train_log(self):
x = range(len(self.w_log))
plt.figure(figsize=(10, 4))
plt.subplot(1, 3, 1)
plt.title('w')
plt.plot(x, self.w_log)
plt.subplot(1, 3, 2)
plt.title('b')
plt.plot(x, self.b_log)
plt.subplot(1, 3, 3)
plt.title('loss')
plt.plot(x, self.loss_log)
plt.show()
def get_samples(w=1, b=0, sample_count=100):
w0 = w
b0 = b
num_sample = sample_count
x_list = []
y_list = []
for i in range(num_sample):
x = np.random.randint(0, 100) * random.random()
x_list.append(x)
y_list.append(w0 * x + b0 + random.random() * random.randint(-1, 1))
return x_list, y_list
if __name__ == '__main__':
liner_reg = LinearRegression(iter_num=10000)
x_sample, y_sample = get_samples(w=4, b=10, sample_count=1000)
learning_rate = 0.001
max_iter = 10000
liner_reg.train(x_sample, y_sample)
liner_reg.plot_train_log()
机器学习—LinearRegression(python类实现)
最新推荐文章于 2024-03-01 16:49:18 发布