使用Otto数据集,标签列为[‘Class_1’, ‘Class_2’…, ‘Class_9’]
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import os
class OttoData(Dataset):
def __init__(self, filepath):
xy = pd.read_csv(filepath, sep=',')
self.len = xy.shape[0]
self.x_data = torch.tensor(xy.iloc[:, 1:-1].values, dtype=torch.float32)
y_data = xy.iloc[:, -1]
unique_labels = sorted(y_data.unique())
self.label_to_index = {label: index for index, label in enumerate(unique_labels)}
self.index_to_label = {index: label for index, label in enumerate(unique_labels)}
y_data_mapped = y_data.map(self.label_to_index)
self.y_data = torch.tensor(y_data_mapped.values, dtype=torch.long)
def __getitem__(self, item):
return self.x_data[item], self.y_data[item]
def __len__(self):
return self.len
tra_dataset = OttoData("./datasets/otto/train.csv")
tra_dataloader = DataLoader(dataset=tra_dataset, batch_size=64, shuffle=True)
class OttoNet(torch.nn.Module):
def __init__(self):
super(OttoNet, self).__init__()
self.linear1 = torch.nn.Linear(93, 256)
self.linear2 = torch.nn.Linear(256, 128)
self.linear3 = torch.nn.Linear(128, 64)
self.linear4 = torch.nn.Linear(64, 32)
self.linear5 = torch.nn.Linear(32, 9)
self.relu = torch.nn.ReLU()
def forward(self, x):
x = self.relu(self.linear1(x))
x = self.relu(self.linear2(x))
x = self.relu(self.linear3(x))
x = self.relu(self.linear4(x))
x = self.linear5(x)
return x
model = OttoNet()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.5)
def train(epoch):
running_loss = 0.0
total = 0
correct = 0
for i, data in enumerate(tra_dataloader):
inputs, targets = data
optimizer.zero_grad()
y_pred = model(inputs)
l = criterion(y_pred, targets)
l.backward()
optimizer.step()
running_loss += l.item()
if i % 300 == 299:
print("[%d %5d], loss: %3f" % (epoch + 1, i + 1, running_loss / 500))
running_loss = 0.0
_, predicted = torch.max(y_pred.data, dim=1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
print("Accuracy on Train is: %d %%" % (100 * correct / total))
return l.item()
if __name__ == '__main__':
loss = []
for epoch in range(50):
l = train(epoch)
loss.append(l)
plt.plot(range(50), loss)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()
plt.close()