首先,我们需要加载NSS-KDD数据集。可以从以下链接下载数据集:http://www.unb.ca/cic/datasets/nsl.html
在代码中,我们使用pandas库来读取csv文件,并将标签转换为数字。我们还需要使用sklearn库中的train_test_split函数来将数据集分为训练集和测试集。
```python
import pandas as pd
from sklearn.model_selection import train_test_split
# 加载数据集
df = pd.read_csv('KDDTrain+.csv')
# 将标签转换为数字
df['label'] = df['label'].map({'normal': 0, 'neptune': 1, 'warezclient': 2, 'ipsweep': 3, 'portsweep': 4,
'teardrop': 5, 'nmap': 6, 'satan': 7, 'smurf': 8, 'pod': 9,
'back': 10, 'guess_passwd': 11, 'ftp_write': 12, 'multihop': 13, 'rootkit': 14,
'buffer_overflow': 15, 'imap': 16, 'warezmaster': 17, 'phf': 18, 'land': 19,
'loadmodule': 20, 'spy': 21, 'perl': 22, 'saint': 23, 'mscan': 24,
'apache2': 25, 'snmpgetattack': 26, 'processtable': 27, 'httptunnel': 28, 'ps': 29,
'snmpguess': 30, 'mailbomb': 31, 'named': 32, 'sendmail': 33, 'xterm': 34,
'worm': 35, 'xlock': 36, 'xsnoop': 37, 'sqlattack': 38})
# 分割数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(df.drop('label', axis=1), df['label'], test_size=0.2)
```
接下来,我们需要对数据进行预处理。我们使用sklearn的StandardScaler函数对数据进行标准化。此外,我们还需要将数据转换为PyTorch张量。
```python
import torch
from sklearn.preprocessing import StandardScaler
# 对数据进行标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 将数据转换为PyTorch张量
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
y_train = torch.tensor(y_train.values)
y_test = torch.tensor(y_test.values)
```
现在,我们可以构建LSTM模型。在这个例子中,我们使用两层LSTM和一个全连接层。我们还需要定义一个损失函数和一个优化器。
```python
import torch.nn as nn
import torch.optim as optim
# 定义LSTM模型
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
# 定义模型参数
input_size = X_train.shape[1]
hidden_size = 128
num_layers = 2
num_classes = 39
learning_rate = 0.001
# 定义模型、损失函数和优化器
model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
```
现在,我们可以开始训练模型。我们需要将数据分批次传递给模型,并在每个批次之后更新模型的权重。
```python
# 训练模型
num_epochs = 10
batch_size = 64
for epoch in range(num_epochs):
for i in range(0, len(X_train), batch_size):
inputs = X_train[i:i+batch_size].to(device)
targets = y_train[i:i+batch_size].to(device)
# 前向传播
outputs = model(inputs)
# 计算损失
loss = criterion(outputs, targets)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 每个epoch打印损失
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')
```
最后,我们可以使用测试集评估模型的性能。
```python
# 评估模型
with torch.no_grad():
correct = 0
total = 0
for i in range(0, len(X_test), batch_size):
inputs = X_test[i:i+batch_size].to(device)
targets = y_test[i:i+batch_size].to(device)
# 前向传播
outputs = model(inputs)
# 预测类别
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
print(f'Test Accuracy: {100 * correct / total:.2f}%')
```
完整代码如下: