PyTorch深度学习实践概论笔记8练习-kaggle的Titanic数据集预测(三)构建模型(使用DataLoader类)

28 篇文章 17 订阅

接着文章PyTorch深度学习实践概论笔记8练习-kaggle的Titanic数据集预测(一)数据分析,我们构建模型来预测人员是否存活,然后提交到 kaggle的Titanic - Machine Learning from Disaster | Kaggle,查看成绩。

1 模型1

3个线性层43-64-16-2。

1.1 使用Dataset和DataLoader类读取数据

导入相应库。 

import pandas as pd
import time
import torch
from torch.utils.data import Dataset,DataLoader

#test其实也有label属性,定义Dataset类的时候同理,不是pd的那种处理方式!

定义TitanicDataset类,代码如下:

class TitanicDataset(Dataset):
    def __init__(self,path):
        xy_data = pd.read_csv(path)
        self.len = xy_data.shape[0]
        titanic_data_X = xy_data.drop(['Survived'], axis=1)
        titanic_data_Y = xy_data['Survived']
        # 转化tensor格式
        self.xy_data = torch.from_numpy(titanic_data_X.values).float()
        self.xy_label = torch.from_numpy(titanic_data_Y.values).float()

    def __getitem__(self,index):
            return self.xy_data[index],self.xy_label[index]
    
    def __len__(self):
        return self.len

titanic_train = TitanicDataset("./titanic/titanic_train.csv")
train_loader = DataLoader(dataset=titanic_train,batch_size=16,
                          shuffle=True,num_workers=1)
titanic_test = TitanicDataset("./titanic/titanic_test.csv")
test_loader = DataLoader(dataset=titanic_test,batch_size=16,
                          shuffle=False,num_workers=1)

1.2 构造模型

class Net1(torch.nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(43, 64),  #43  
            torch.nn.ReLU(),
            torch.nn.Linear(64, 16),
            torch.nn.ReLU(),
            torch.nn.Linear(16, 2),
            torch.nn.Softmax(dim=1)
        )

    def forward(self, x):
#         print(x.shape)  #torch.Size([16, 43])
        return self.fc(x)

net1 = Net1()
print(net1)

输出结果如下:

Net1(
  (fc): Sequential(
    (0): Linear(in_features=43, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=2, bias=True)
    (5): Softmax(dim=1)
  )
)

1.3 损失函数和优化器

criterion = torch.nn.CrossEntropyLoss()  # 定义损失函数
optimizer = torch.optim.Adam(net1.parameters(), lr=0.001)  # 定义优化器

1.4 训练模型

if __name__ == '__main__':
    start = time.time()
    for epoch in range(5):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data  # 获取数据
            optimizer.zero_grad()  # 清空梯度缓存

            outputs = net1(inputs)
            outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels.long())
            loss.backward()  
            optimizer.step()  
            running_loss += loss.item()
            if i % 20 == 19:
                # 每 20 次迭代打印一次信息
                print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/2000))
                running_loss = 0.0
    print('Finish Training! Total cost time: ', time.time()-start)

输出如下:

[1,    20] loss: 0.007
[1,    40] loss: 0.006
[2,    20] loss: 0.006
[2,    40] loss: 0.005
[3,    20] loss: 0.005
[3,    40] loss: 0.005
[4,    20] loss: 0.005
[4,    40] loss: 0.005
[5,    20] loss: 0.005
[5,    40] loss: 0.005
Finish Training! Total cost time:  0.8926229476928711

1.5 验证模型

#初始化数值
val_correct = 0
total = 0
with torch.no_grad():
    for data in train_loader:
        inputs, labels = data
        outputs = net1(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        val_correct += (predicted == labels).sum().item()
print('Accuracy of the network: %d %%' %
      (val_correct / total * 100))

输出如下:

Accuracy of the network: 85 %

1.6 测试模型

测试模型,作者的最终提交准确率 78.708%。

with torch.no_grad():
    for data in test_loader:
        inputs,labels = data  # 获取数据
        outputs = torch.max(net1(inputs),1)[1]

submission = pd.read_csv('./titanic/gender_submission.csv')
submission['Survived'] = output
submission.to_csv('./titanic/gender_submission_result1.csv', index=False)

模型1的提交准确率为78.708%。

于是,继续优化。

2 模型2

4个线性层加上dropout,42-64-32(p=0.1)-32-16-2

2.2 构造模型

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc = nn.Sequential(
            torch.nn.Linear(43, 64),  #43
            torch.nn.ReLU(),
            torch.nn.Linear(64, 32),
            torch.nn.Dropout(p=0.1),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 16),
            torch.nn.ReLU(),
            torch.nn.Linear(16, 2),
            torch.nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.fc(x)


net2 = Net2()
print(net2)

输出模型如下:

Net2(
  (fc): Sequential(
    (0): Linear(in_features=43, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): Dropout(p=0.1, inplace=False)
    (4): ReLU()
    (5): Linear(in_features=32, out_features=16, bias=True)
    (6): ReLU()
    (7): Linear(in_features=16, out_features=2, bias=True)
    (8): Softmax(dim=1)
  )
)

2.4 训练模型

#训练模型
if __name__ == '__main__':
    start = time.time()
    for epoch in range(10):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data  # 获取数据
            optimizer.zero_grad()  # 清空梯度缓存

            outputs = net2(inputs)
            outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels.long())
            loss.backward()  
            optimizer.step()  
            running_loss += loss.item()
            if i % 20 == 19:
                # 每 20 次迭代打印一次信息
                print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/2000))
                running_loss = 0.0
    print('Finish Training! Total cost time: ', time.time()-start)

输出结果:

[1,    20] loss: 0.007
[1,    40] loss: 0.007
[2,    20] loss: 0.006
[2,    40] loss: 0.005
[3,    20] loss: 0.005
[3,    40] loss: 0.005
[4,    20] loss: 0.004
[4,    40] loss: 0.005
[5,    20] loss: 0.005
[5,    40] loss: 0.005
Finish Training! Total cost time:  2.1204745769500732

2.5 验证模型

Accuracy of the network: 86 %。

2.6 测试模型

模型2的提交准确率为78.947%,提交的时候排名1376,继续优化。

3 模型3

5个线性层加上2个dropout,42-64-128(p=0.1)-32(p=0.1)-32-16-2

3.2 构造模型

class Net3(nn.Module):
    def __init__(self):
        super(Net3, self).__init__()
        self.fc = nn.Sequential(
            torch.nn.Linear(43, 64),  #43
            torch.nn.ReLU(),
            torch.nn.Linear(64, 128),
            torch.nn.Dropout(p=0.1),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 32),
            torch.nn.Dropout(p=0.1),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 16),
            torch.nn.ReLU(),
            torch.nn.Linear(16, 2),
            torch.nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.fc(x)


net3 = Net3()
print(net3)

输出模型如下:

Net3(
  (fc): Sequential(
    (0): Linear(in_features=43, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
    (3): Dropout(p=0.1, inplace=False)
    (4): ReLU()
    (5): Linear(in_features=128, out_features=32, bias=True)
    (6): Dropout(p=0.1, inplace=False)
    (7): ReLU()
    (8): Linear(in_features=32, out_features=16, bias=True)
    (9): ReLU()
    (10): Linear(in_features=16, out_features=2, bias=True)
    (11): Softmax(dim=1)
  )
)

3.4 训练模型

#训练模型
if __name__ == '__main__':
    start = time.time()
    for epoch in range(10):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data  # 获取数据
            optimizer.zero_grad()  # 清空梯度缓存

            outputs = net3(inputs)
            outputs = outputs.squeeze(-1)
            loss = criterion(outputs, labels.long())
            loss.backward()  
            optimizer.step()  
            running_loss += loss.item()
            if i % 20 == 19:
                # 每 20 次迭代打印一次信息
                print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/2000))
                running_loss = 0.0
    print('Finish Training! Total cost time: ', time.time()-start)

输出结果:

[1,    20] loss: 0.007
[1,    40] loss: 0.007
[2,    20] loss: 0.005
[2,    40] loss: 0.005
[3,    20] loss: 0.005
[3,    40] loss: 0.005
[4,    20] loss: 0.005
[4,    40] loss: 0.005
[5,    20] loss: 0.005
[5,    40] loss: 0.004
Finish Training! Total cost time:  0.9200258255004883

3.5 验证模型

Accuracy of the network: 86 %。

3.6 测试模型

模型3的提交准确率为79.186%,提交的时候排名1118,No1的准确率为1.0,还需要继续加油啊。

说明:记录学习笔记,如果错误欢迎指正!写文章不易,转载请联系我。

  • 2
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 11
    评论
以下是使用PyTorch实现LSTM训练模型使用NSS-KDD数据集的步骤: 1. 下载NSS-KDD数据集并进行预处理 - 下载链接:http://www.unb.ca/cic/datasets/nsl.html - 使用预处理脚本对数据集进行处理,使其符合PyTorch的输入格式 2. 定义LSTM模型 - 使用PyTorch的nn模块定义LSTM模型,包括输入、LSTM层、输出层等 3. 定义损失函数和优化器 - 选择交叉熵损失函数和Adam优化器 4. 进行模型训练 - 使用PyTorchDataLoader加载数据集,对模型进行训练,并记录训练过程中的损失与准确率 5. 进行模型测试 - 使用测试集对训练好的模型进行测试,记录测试结果 6. 分析结果并进行优化 - 根据测试结果进行模型优化,并重新进行训练和测试,直到达到满意的效果 以下是一个简单的PyTorch LSTM训练模型示例: ```python import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader # 定义LSTM模型 class LSTMModel(nn.Module): def __init__(self, input_size, hidden_size, output_size): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True) self.fc = nn.Linear(hidden_size, output_size) def forward(self, x): h0 = torch.zeros(1, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(1, x.size(0), self.hidden_size).to(device) out, _ = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) # 进行模型训练 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) for epoch in range(num_epochs): for i, (inputs, labels) in enumerate(train_loader): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item())) # 进行模型测试 test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) with torch.no_grad(): correct = 0 total = 0 for inputs, labels in test_loader: inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total)) ``` 需要注意的是,以上代码仅为示例,具体实现需要根据自己的需求进行修改和完善。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 11
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值