一、代码中的数据集可以点击以下链接进行下载
二、代码运行环境
Pytorch-gpu==1.7.1
Python==3.7
三、数据集处理的代码如下所示
import torch
import pandas as pd
from torch.utils.data import TensorDataset
from sklearn.model_selection import train_test_split
# 进行数据的加载
def data_loader():
data = pd.read_csv(r'dataset\mlp_dataset.csv')
data = data.join(pd.get_dummies(data.salary))
del data['salary']
data = data.join(pd.get_dummies(data.part))
del data['part']
X_data = data[[c for c in data.columns if c != 'left']].values
Y_data = data.left.values.reshape(-1, 1)
train_x, test_x, train_y, test_y = train_test_split(X_data, Y_data)
train_x = torch.from_numpy(train_x).type(torch.FloatTensor)
train_y = torch.from_numpy(train_y).type(torch.FloatTensor)
test_x = torch.from_numpy(test_x).type(torch.FloatTensor)
test_y = torch.from_numpy(test_y).type(torch.FloatTensor)
train_dataset = TensorDataset(train_x, train_y)
test_dataset = TensorDataset(test_x, test_y)
return train_dataset, test_dataset
if __name__ == '__main__':
train, test = data_loader()
print(train[:])
print(test[:])
四、模型的构建代码如下所示
from torch import nn
import torch
# 进行模型的构建
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.liner1 = nn.Linear(in_features=20, out_features=64)
self.liner2 = nn.Linear(in_features=64, out_features=64)
self.liner3 = nn.Linear(in_features=64, out_features=1)
def forward(self, model_input):
x = torch.relu(self.liner1(model_input))
x = torch.relu(self.liner2(x))
x = torch.sigmoid(self.liner3(x))
return x
五、模型的训练代码如下所示
from data_loader import data_loader
from model_loader import Model
import torch
import tqdm
from torch.utils.data import DataLoader
import os
def accuracy(y_pred, y_true):
y_pred = (y_pred > 0.5).type(torch.int32)
model_acc = (y_pred == y_true).float().mean()
return model_acc
# 进行数据的加载
train_dataset, test_dataset = data_loader()
train_dataset_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_dataset_loader = DataLoader(dataset=test_dataset, batch_size=64)
x, y = test_dataset[:]
train_x, train_y = train_dataset[:]
x = x.cuda()
y = y.cuda()
train_x = train_x.cuda()
train_y = train_y.cuda()
# 进行模型的创建
model = Model()
model.cuda()
# 进行模型训练的相关配置
opt = torch.optim.Adam(params=model.parameters(), lr=0.0001)
loss_func = torch.nn.BCELoss()
epochs = 1000
# 开始进行训练
train_tqdm = tqdm.tqdm(iterable=range(epochs), total=epochs)
for epoch in train_tqdm:
for X, Y in train_dataset_loader:
X = X.cuda()
Y = Y.cuda()
Y_pred = model(X)
loss = loss_func(Y_pred, Y)
opt.zero_grad()
loss.backward()
opt.step()
with torch.no_grad():
acc = accuracy(y_pred=model(x), y_true=y)
train_acc = accuracy(y_pred=model(train_x), y_true=train_y)
train_tqdm.set_description_str('Epoch: {:3d}'.format(epoch))
train_tqdm.set_postfix_str(
'Loss is {:.3f},Train_Accuracy is {:.3f},Test_Accuracy is {:.3f}'.format(
loss_func(model(x), y).data.item(), train_acc, acc))
train_tqdm.close()
# 进行模型的保存
if not os.path.exists('model_data'):
os.mkdir(r'model_data')
torch.save(model.state_dict(), r'model_data\model.pth')
六、模型的预测代码如下所示
import torch
from model_loader import Model
from train import accuracy
from data_loader import data_loader
# 数据的加载
train_dataset, test_dataset = data_loader()
train_x, train_y = train_dataset[:]
test_x, test_y = test_dataset[:]
# 模型的加载
model = Model()
model_state_dict = torch.load(r'model_data\model.pth')
model.load_state_dict(model_state_dict)
# 开始进行预测
print('模型在训练集的识别正确率为 {:14f},模型在测试集的识别正确率为 {:14f}'.format(accuracy(y_pred=model(train_x), y_true=train_y),
accuracy(y_pred=model(test_x), y_true=test_y)))