对于数据量大,适合以一维数据作为输入且难以探索数据内部规律的数据集,可以尝试利用深度学习神经网络(DNN)来对数据进行分类。
由于是分类问题,使用Adam作为优化器,交叉熵作为损失函数,大概的网络结构代码如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
class CipvDNN(nn.Module):
def __init__(self, input_dim,output_dim):
super(CipvDNN, self).__init__()
self.fc1 = nn.Linear(input_dim, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, output_dim)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
#x = F.dropout(x, p=0.1)
return x
def predict(self, features):
self.eval()
features = torch.from_numpy(features).float()
return self.forward(features).detach().numpy()
class CipvDataset(Dataset):
def __init__(self, labels, features):
super(CipvDataset, self).__init__()
self.labels = labels
self.features = features
def __len__(self):
return self.features.shape[0]
def __getitem__(self, idx):
feature = self.features[idx]
label = self.labels[idx]
return {'feature': feature, 'label': label}
class CipvTrain_DNN():
def __init__(self):
self.network = CipvDNN(100,20)
self.learning_rate = 0.0001
self.optimizer = torch.optim.Adam(self.network.parameters(), lr=self.learning_rate)
self.criterion = nn.CrossEntropyLoss()
self.num_epochs = 1000
self.batchsize = 500
#self.shuffle = True
def train(self, features, labels):
self.network.train()
dataset = CipvDataset(labels, features)
loader = DataLoader(dataset, batch_size=self.batchsize)
#loader = DataLoader(dataset, shuffle=self.shuffle, batch_size = self.batchsize)
for epoch in range(self.num_epochs):
total_loss = 0.0
for i, data in enumerate(loader):
features = data['feature'].float()
labels = data['label'].long()
self.optimizer.zero_grad()
predictions = self.network(features)
loss = self.criterion(predictions, labels.squeeze(-1))
loss.backward()
total_loss += loss.item()
self.optimizer.step()
print('loss', total_loss / (i+1))
def get_action(self, feature):
self.network.eval()
p = self.network.predict(feature)
label = np.argmax(p)
return label
在训练好之后,如何知道自己模型的预测效果呢?这里提供一个简单的预测函数。
def test_model(policy,features,labels):
success_time=0
for i in range(len(features)):
label_got=policy.get_action(features[i])
if label_got==labels[i]:
success_time +=1
success_rate = success_time/len(features)
print('success rate',success_rate)
return 0