1.引入模块,读取数据
1.1.导入数据
import torch
import numpy as np
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
data = pd.read_csv('pima-indians-diabetes.csv')
print(data.head(5))
数据示例:
1.2 数据预处理
data1 = data.copy()
y = data1.loc[:, ['是否患病']] # 数据标签
del data1['是否患病']
x = data1 # 数据
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2019) # 数据集三七分,随机种子2019
ss = StandardScaler()
x_train = ss.fit_transform(x_train) # 数据标准化
x_test = ss.fit_transform(x_test) # 数据标准化
1.3 数据转化为tensor
x_train_tensor = torch.from_numpy(x_train)
x_test_tensor = torch.from_numpy(x_test)
y_train_numpy = np.array(y_train)
y_train_tensor = torch.from_numpy(y_train_numpy)
y_test_numpy = np.array(y_test)
y_test_tensor = torch.from_numpy(y_test_numpy)
x = x_train_tensor.float()
y = y_train_tensor.float()
2.构建计算图(构建网络模型)
class module_net(nn.Module):
def __init__(self, num_input, num_hidden, num_output):
super(module_net, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)
self.layer2 = nn.Tanh()
self.layer3 = nn.Linear(num_hidden, num_hidden)
self.layer4 = nn.Tanh()
self.layer5 = nn.Linear(num_hidden, num_hidden)
self.layer6 = nn.Tanh()
self.layer7 = nn.Linear(num_hidden, num_output)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
print(x.shape)
print(y.shape)
3.损失函数与优化器
criterion = nn.BCEWithLogitsLoss()
mo_net = module_net(8, 10, 1)
optim = torch.optim.SGD(mo_net.parameters(), 0.01, momentum=0.9)
4.开始训练模型
Loss_list = [] # 用来装loss值,以便之后画图
Accuracy_list = [] # 用来装准确率,以便之后画图
for e in range(10000):
out = mo_net.forward(Variable(x)) # 这里省略了 mo_net.forward()
loss = criterion(out, Variable(y))
# print("loss", loss.item())
Loss_list.append(loss.item())
# --------------------用于求准确率-------------------------#
out_class = (out[:] > 0).float() # 将out矩阵中大于0的转化为1,小于0的转化为0,存入a中
right_num = torch.sum(y == out_class).float() # 分类对的数值
precision = right_num/out.shape[0] # 准确率
# --------------------求准确率结束-------------------------#
Accuracy_list.append(precision)
optim.zero_grad()
loss.backward()
optim.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {},precision{},right_num{}'.format(e+1, loss.item(), precision, right_num))
x1 = np.arange(0, 10000, 1)
print(len(Loss_list))
print(len(Accuracy_list))
plt.plot(x1, Loss_list, c='red', label='loss')
plt.plot(x1, Accuracy_list, c='blue', label='precision')
plt.show()
5.对训练的模型预测结果进行评估
x_test_tensor = x_test_tensor.float()
y_test_tensor = y_test_tensor.float()
out_test = mo_net.forward(Variable(x_test_tensor))
loss_test = criterion(out_test, Variable(y_test_tensor))
out_test_class = (out_test[:] > 0).float() # 将out矩阵中大于0的转化为1,小于0的转化为0,存入a中
right_num_test = torch.sum(y_test_tensor == out_test_class).float() # 分类对的数值
precision_test = right_num_test/out_test.shape[0] # 准确率
print("loss_test", loss_test)
loss_test = loss_test.item()
print('loss_test:{},precision_test:{},right_num_test:{}'.format(loss_test, precision_test, right_num_test))
6. 完整代码
# -*- coding: utf-8 -*-
# 一、引入模块,读取数据
# 1.导入数据
import torch
import numpy as np
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
data = pd.read_csv('pima-indians-diabetes.csv')
print(data.head(5))
# 2.数据预处理
data1 = data.copy()
y = data1.loc[:, ['是否患病']] # 数据标签
del data1['是否患病']
x = data1 # 数据
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=2019) # 数据集三七分,随机种子2019
ss = StandardScaler()
x_train = ss.fit_transform(x_train) # 数据标准化
x_test = ss.fit_transform(x_test) # 数据标准化
# 3.数据转化为tensor
x_train_tensor = torch.from_numpy(x_train)
x_test_tensor = torch.from_numpy(x_test)
y_train_numpy = np.array(y_train)
y_train_tensor = torch.from_numpy(y_train_numpy)
y_test_numpy = np.array(y_test)
y_test_tensor = torch.from_numpy(y_test_numpy)
x = x_train_tensor.float()
y = y_train_tensor.float()
# 二、构建计算图(构建网络模型)
class module_net(nn.Module):
def __init__(self, num_input, num_hidden, num_output):
super(module_net, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)
self.layer2 = nn.Tanh()
self.layer3 = nn.Linear(num_hidden, num_hidden)
self.layer4 = nn.Tanh()
self.layer5 = nn.Linear(num_hidden, num_hidden)
self.layer6 = nn.Tanh()
self.layer7 = nn.Linear(num_hidden, num_output)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
# 打印一下上面y和x的shape
print(x.shape)
print(y.shape)
# 上面模型测试集准确率高,测试集准确率低-->过拟合
# 简化模型结构,模型从7层删除成为3层
class module_net_pro(nn.Module):
def __init__(self, num_input, num_hidden, num_output):
super(module_net_pro, self).__init__()
self.layer1 = nn.Linear(num_input, num_hidden)
self.layer2 = nn.ReLU()
self.layer3 = nn.Linear(num_hidden, num_output)
self.layer4 = nn.ReLU()
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
# 三、损失函数与优化器
criterion = nn.BCEWithLogitsLoss()
# mo_net = module_net(8, 10, 1) # 改进前(过拟合)
mo_net = module_net_pro(8, 10, 1) # 改进后
optim = torch.optim.SGD(mo_net.parameters(), 0.01, momentum=0.9)
# 四、开始训练模型
Loss_list = [] # 用来装loss值,以便之后画图
Accuracy_list = [] # 用来装准确率,以便之后画图
for e in range(10000):
out = mo_net.forward(Variable(x)) # 这里省略了 mo_net.forward()
loss = criterion(out, Variable(y))
# print("loss", loss.item())
Loss_list.append(loss.item())
# --------------------用于求准确率-------------------------#
out_class = (out[:] > 0).float() # 将out矩阵中大于0的转化为1,小于0的转化为0,存入a中
right_num = torch.sum(y == out_class).float() # 分类对的数值
precision = right_num/out.shape[0] # 准确率
# --------------------求准确率结束-------------------------#
Accuracy_list.append(precision)
optim.zero_grad()
loss.backward()
optim.step()
if (e + 1) % 1000 == 0:
print('epoch: {}, loss: {},precision{},right_num{}'.format(e+1, loss.item(), precision, right_num))
x1 = np.arange(0, 10000, 1)
print(len(Loss_list))
print(len(Accuracy_list))
plt.plot(x1, Loss_list, c='red', label='loss')
plt.plot(x1, Accuracy_list, c='blue', label='precision')
plt.show()
# 五、对训练的模型预测结果进行评估
x_test_tensor = x_test_tensor.float()
y_test_tensor = y_test_tensor.float()
out_test = mo_net.forward(Variable(x_test_tensor))
loss_test = criterion(out_test, Variable(y_test_tensor))
out_test_class = (out_test[:] > 0).float() # 将out矩阵中大于0的转化为1,小于0的转化为0,存入a中
right_num_test = torch.sum(y_test_tensor == out_test_class).float() # 分类对的数值
precision_test = right_num_test/out_test.shape[0] # 准确率
print("loss_test", loss_test)
loss_test = loss_test.item()
print('loss_test:{},precision_test:{},right_num_test:{}'.format(loss_test, precision_test, right_num_test))
改进前结果:
红色表示loss值,蓝色表示正确率
loss_test:3.936781644821167,precision_test:0.6796537041664124,right_num_test:157.0
改进后结果:
红色表示loss值,蓝色表示正确率
loss_test:0.7252744436264038,precision_test:0.7705627679824829,right_num_test:178.0