掌握要点:
1.LogisticRegression回归的基本原理,公式推导
2.利用Pytorch训练的整个流程,包括数据处理,训练
# Logistic Regression
import torch
from torch import nn
from torch.autograd import Variable
from torch import optim
import numpy as np
import matplotlib.pyplot as plt
with open('data.txt', 'r') as f:
data_list = f.readlines()
data_list = [i.split('\n')[0] for i in data_list]
data_list = [i.split(',') for i in data_list]
data = [(float(i[0]), float(i[1]), float(i[2])) for i in data_list]
x0 = list(filter(lambda x: x[-1] == 0.0, data))
x1 = list(filter(lambda x: x[-1] == 1.0, data))
plot_x0_0 = [i[0] for i in x0]
plot_x0_1 = [i[1] for i in x0]
plot_x1_0 = [i[0] for i in x1]
plot_x1_1 = [i[1] for i in x1]
plt.plot(plot_x0_0, plot_x0_1, 'ro', label='x_0')
plt.plot(plot_x1_0, plot_x1_1, 'bo', label='x_1')
plt.legend(loc = 'best')
class LogisticRegression(nn.Module):
def __init__(self):
super(LogisticRegression, self).__init__()
self.lr = nn.Linear(2, 1) #two inputs and onr output
self.sm = nn.Sigmoid()
def forward(self, x):
x = self.lr(x)
x = self.sm(x)
return x
logistic_model = LogisticRegression()
# define LOss and optimizer
criterion = nn.BCELoss() # 2 classify
optimizer = torch.optim.SGD(logistic_model.parameters(), lr = 1e-3,
momentum = 0.9)
# prepare train data
data_x = [i[:2] for i in data]
data_y = [i[-1] for i in data]
x_data = torch.from_numpy(np.array(data_x, dtype=np.float32))
y_data = torch.from_numpy(np.array(data_y, dtype=np.float32))
# train
num_epoch = 50000
loss_all = []
acc_all = []
for epoch in range(num_epoch):
x = Variable(x_data)
y = Variable(y_data)
# forward
out = logistic_model(x)
loss = criterion(out, y)
print_loss = loss.data.item()
mask = out.ge(0.5).float()
correct = (mask.squeeze(1) == y).sum()
acc = correct.data.item() / x.size(0)
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_all.append(print_loss)
acc_all.append(acc)
if (epoch + 1) % 1000 == 0:
print('*'*10)
print('epoch {}'.format(epoch + 1))
print('loss is {:.4f}'.format(print_loss))
print('acc is {:.4f}'.format(acc))
# plot the line
w0, w1 = logistic_model.lr.weight[0]
w0 = w0.data.item()
w1 = w1.data.item()
b = logistic_model.lr.bias.data[0].item()
plot_x = np.arange(30, 100, 0.1)
plot_y = (-w0 * plot_x -b) / w1
plt.plot(plot_x, plot_y)
plt.show()
plt.figure()
index = np.arange(len(loss_all))
plt.plot(index, loss_all, 'r')
plt.legend(['train_loss'])
plt.show()
plt.figure()
plt.plot(index, acc_all, 'b')
plt.legend(['accuracy'])
plt.show()
![点击并拖拽以移动 wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==](https://i-blog.csdnimg.cn/blog_migrate/2cea7545f2a464b3024b97d379f1ce95.png)
问题探究:
(1)filter( )函数
(2)lambda 表达式
(3)nn.BCELoss()
(4)提取tensor中的值采用item( )方法,书上描述的print_loss = loss.data[0]总是报错
(5)w0, w1 = logistic_model.lr.weight[0] 理解获取参数的过程