又是一次失败的尝试。记录下,一个非常简单的分类模型。使用pytorch构建的网络结构,5分类。猜测可能是数据集的问题导致效果不好。
主要看点:
- GPU的简单调用
- 自己数据集的导入与分割使用,之前一直都是别人的数据,自己没有制作过
# encoding:utf-8
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.autograd import Variable
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
#导入数据
train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')
#加速
torch.cuda.set_device(0) # 设置当前设备
cudnn.benchmark = True # 加速计算
cudnn.enabled = True # cuDNN是一个GPU加速深层神经网络原语库,开启cudnn
#转换数据类型为torch.tensor
train_data = torch.from_numpy(train_data.values)
test_data = torch.from_numpy(test_data.values)
print(train_data.size())
print(test_data.size())
#使用随机数据测试模型是否正常跑得通
train_data = torch.rand(80374,64)
test_data = torch.rand(34447,64)
train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers = 4)
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=False, num_workers = 4)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.ClassifierLayer = nn.Sequential(
nn.Dropout(0.7),
nn.Linear(62, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(0.2),
nn.Dropout(0.4), # 0.25
nn.Linear(256, 5),
)
# self.fc1 = nn.Linear(62, 5)
# self.fc2 = nn.Linear(5, 1)
# self.fc3 = nn.Linear(1, 5)
def forward(self, x):
x = self.ClassifierLayer(x)
# x = F.relu(self.fc1(x))
# x = F.relu(self.fc2(x))
# x = self.fc3(x)
return x
model = Net()
model = model.cuda()
# model = nn.DataParallel(net, [0])
#设置优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
print('开始训练模型!!')
correct = 0
total = 0
for epoch in range(5): # loop over the dataset multiple times
running_loss = 0.0
model.train()
for i, data in enumerate(train_loader, 0):
# get the inputs
inputs, labels = data[:,1:-1],data[:,-1]
labels = labels - 1
inputs = torch.as_tensor(inputs, dtype=torch.float32)
inputs = Variable(inputs.contiguous()).cuda()
labels = labels.long()
labels = Variable(labels.contiguous()).cuda(async=True)
# print(data.size(),inputs.size(),labels.size())
# exit()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
correct += (predicted == labels).sum().item()
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total += labels.size(0)
# print statistics
running_loss += loss.item()
if i % 200 == 199: # print every 2000 mini-batches
print('[%d, %5d] loss: %.4f' % (epoch + 1, i + 1, running_loss / 200))
running_loss = 0.0
print('Accuracy of the network on the train images: %d %%' % (100 * correct / total))
correct = 0
total = 0
print('Finished Training')
correct = 0
total = 0
with torch.no_grad():
model.eval()
for i, data in enumerate(test_loader, 0):
inputs, labels = data[:, 1:-1], data[:, -1]
labels = labels - 1
inputs = torch.as_tensor(inputs, dtype=torch.float32)
inputs = Variable(inputs.contiguous()).cuda()
labels = labels.long()
labels = Variable(labels.contiguous()).cuda(async=True)
# print(inputs)
outputs = model(inputs)
# print(outputs)
_, predicted = torch.max(outputs.data, 1)
# print(predicted)
# exit()
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the test images: %d %%' % (
100 * correct / total))