Train 3D ConvNet

本文详细介绍了如何使用3D卷积网络来学习视频中的时空特征,并通过创建体积均值、训练UCF101数据集以及测试网络性能的脚本来实现这一目标。该方法涉及网络结构设计、参数配置和关键步骤执行,旨在提高视频分类任务的准确性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Learning Spatiotemporal Features with 3D Convolutional Networks-2015-ICCV


1. create_volume_mean.sh

GLOG_logtostderr=1 ../../build/tools/compute_volume_mean_from_list.bin ../c3d_finetuning/train_01.lst 16 128 171 1 ucf101_train_mean.binaryproto 10
2. train_ucf101.sh

GLOG_logtostderr=1 ../../build/tools/train_net.bin conv3d_ucf101_solver.prototxt

3. test_ucf101.sh

GLOG_logtostderr=1 ../../build/tools/test_net.bin conv3d_ucf101_test.prototxt conv3d_ucf101_iter_60000 1396 GPU 0


a. conv3d_ucf101_solver.prototxt

train_net: "conv3d_ucf101_train.prototxt"
test_net: "conv3d_ucf101_test.prototxt"
test_iter: 100
test_interval: 1000
base_lr: 0.003
momentum: 0.9
weight_decay: 0.005
lr_policy: "step"
gamma: 0.1
stepsize: 20000
# Display every 20 iterations
display: 20
# The maximum number of iterations
max_iter: 60000
# snapshot intermediate results
snapshot: 1000
snapshot_prefix: "conv3d_ucf101"
# solver mode: CPU or GPU
solver_mode: GPU
device_id: 0

b. conv3d_ucf101_train.prototxt

name: "deep_c3d_ucf101"
layers {
  name: "data"
  type: VIDEO_DATA
  top: "data"
  top: "label"
  image_data_param {
    source: "../c3d_finetuning/train_01.lst"
    use_image: true
    mean_file: "ucf101_train_mean.binaryproto"
    batch_size: 30
    crop_size: 112
    mirror: true
    show_data: 0
    new_height: 128
    new_width: 171
    new_length: 16
    shuffle: true
  }
}
# ----------- 1st layer group ---------------
layers {
  name: "conv1a"
  type: CONVOLUTION3D
  bottom: "data"
  top: "conv1a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 64
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu1a"
  type: RELU
  bottom: "conv1a"
  top: "conv1a"
}
layers {
  name: "pool1"
  type: POOLING3D
  bottom: "conv1a"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 1
    stride: 2
    temporal_stride: 1
  }
}
# ------------- 2nd layer group --------------
layers {
  name: "conv2a"
  type: CONVOLUTION3D
  bottom: "pool1"
  top: "conv2a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 128
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu2a"
  type: RELU
  bottom: "conv2a"
  top: "conv2a"
}
layers {
  name: "pool2"
  type: POOLING3D
  bottom: "conv2a"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}
# ----------------- 3rd layer group --------------
layers {
  name: "conv3a"
  type: CONVOLUTION3D
  bottom: "pool2"
  top: "conv3a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu3a"
  type: RELU
  bottom: "conv3a"
  top: "conv3a"
}
layers {
  name: "pool3"
  type: POOLING3D
  bottom: "conv3a"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}

# --------- 4th layer group
layers {
  name: "conv4a"
  type: CONVOLUTION3D
  bottom: "pool3"
  top: "conv4a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu4a"
  type: RELU
  bottom: "conv4a"
  top: "conv4a"
}
layers {
  name: "pool4"
  type: POOLING3D
  bottom: "conv4a"
  top: "pool4"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}

# --------------- 5th layer group --------
layers {
  name: "conv5a"
  type: CONVOLUTION3D
  bottom: "pool4"
  top: "conv5a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu5a"
  type: RELU
  bottom: "conv5a"
  top: "conv5a"
}
layers {
  name: "pool5"
  type: POOLING3D
  bottom: "conv5a"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}
# ---------------- fc layers -------------
layers {
  name: "fc6"
  type: INNER_PRODUCT
  bottom: "pool5"
  top: "fc6"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2048
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu6"
  type: RELU
  bottom: "fc6"
  top: "fc6"
}
layers {
  name: "drop6"
  type: DROPOUT
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc7"
  type: INNER_PRODUCT
  bottom: "fc6"
  top: "fc7"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2048
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu7"
  type: RELU
  bottom: "fc7"
  top: "fc7"
}
layers {
  name: "drop7"
  type: DROPOUT
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc8"
  type: INNER_PRODUCT
  bottom: "fc7"
  top: "fc8"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 101
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "fc8"
  bottom: "label"
}

c. conv3d_ucf101_test.prototxt

name: "deep_c3d_ucf101"
layers {
  name: "data"
  type: VIDEO_DATA
  top: "data"
  top: "label"
  image_data_param {
    source: "../c3d_finetuning/test_01.lst"
    use_image: true
    mean_file: "ucf101_train_mean.binaryproto"
    batch_size: 30
    crop_size: 112
    mirror: false
    show_data: 0
    new_height: 128
    new_width: 171
    new_length: 16
    shuffle: true
  }
}
# ----------- 1st layer group ---------------
layers {
  name: "conv1a"
  type: CONVOLUTION3D
  bottom: "data"
  top: "conv1a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 64
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu1a"
  type: RELU
  bottom: "conv1a"
  top: "conv1a"
}
layers {
  name: "pool1"
  type: POOLING3D
  bottom: "conv1a"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 1
    stride: 2
    temporal_stride: 1
  }
}
# ------------- 2nd layer group --------------
layers {
  name: "conv2a"
  type: CONVOLUTION3D
  bottom: "pool1"
  top: "conv2a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 128
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu2a"
  type: RELU
  bottom: "conv2a"
  top: "conv2a"
}
layers {
  name: "pool2"
  type: POOLING3D
  bottom: "conv2a"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}
# ----------------- 3rd layer group --------------
layers {
  name: "conv3a"
  type: CONVOLUTION3D
  bottom: "pool2"
  top: "conv3a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu3a"
  type: RELU
  bottom: "conv3a"
  top: "conv3a"
}
layers {
  name: "pool3"
  type: POOLING3D
  bottom: "conv3a"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}

# --------- 4th layer group
layers {
  name: "conv4a"
  type: CONVOLUTION3D
  bottom: "pool3"
  top: "conv4a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu4a"
  type: RELU
  bottom: "conv4a"
  top: "conv4a"
}
layers {
  name: "pool4"
  type: POOLING3D
  bottom: "conv4a"
  top: "pool4"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}

# --------------- 5th layer group --------
layers {
  name: "conv5a"
  type: CONVOLUTION3D
  bottom: "pool4"
  top: "conv5a"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    kernel_size: 3
    kernel_depth: 3
    pad: 1
    temporal_pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu5a"
  type: RELU
  bottom: "conv5a"
  top: "conv5a"
}
layers {
  name: "pool5"
  type: POOLING3D
  bottom: "conv5a"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 2
    kernel_depth: 2
    stride: 2
    temporal_stride: 2
  }
}
# ---------------- fc layers -------------
layers {
  name: "fc6"
  type: INNER_PRODUCT
  bottom: "pool5"
  top: "fc6"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2048
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu6"
  type: RELU
  bottom: "fc6"
  top: "fc6"
}
layers {
  name: "drop6"
  type: DROPOUT
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc7"
  type: INNER_PRODUCT
  bottom: "fc6"
  top: "fc7"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2048
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu7"
  type: RELU
  bottom: "fc7"
  top: "fc7"
}
layers {
  name: "drop7"
  type: DROPOUT
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc8"
  type: INNER_PRODUCT
  bottom: "fc7"
  top: "fc8"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 101
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "prob"
  type: SOFTMAX
  bottom: "fc8"
  top: "prob"
}
layers {
  top: "accuracy"
  name: "accuracy"
  type: ACCURACY
  bottom: "prob"
  bottom: "label"
}








基于Python的天气预测与可视化(完整源码+说明文档+数据),个人经导师指导并认可通过的高分设计项目,评审分99分,代码完整确保可以运行,小白也可以亲自搞定,主要针对计算机相关专业的正在做大作业的学生和需要项目实战练习的学习者,可作为毕业设计、课程设计、期末大作业,代码资料完整,下载可用。 基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基于Python的天气预测与可视化(完整源码+说明文档+数据)基
以下是一个完整的3D卷积案例,用于对3D体积图像进行分类: 1. 建立数据集 我们下载了一个名为"Brain tumor dataset"的3D体积图像数据集,该数据集有2个类别:正常(类别0)和带有肿瘤(类别1)。每个样本是由155x240x240 3D体积组成的。 我们将在这里使用pytorch中的torchvision.transforms进行数据增强。 ```python import os import torch import random import numpy as np import torch.nn as nn import torch.optim as optim import torchvision.transforms as transforms import torch.utils.data as data from torch.utils.data import DataLoader, Dataset from PIL import Image class CustomDataset(Dataset): def __init__(self, data_dir, transform=None): self.data_dir = data_dir self.transform = transform self.file_list = os.listdir(data_dir) def __len__(self): return len(self.file_list) def __getitem__(self, idx): img_path = os.path.join(self.data_dir, self.file_list[idx]) img = np.load(img_path) if self.transform: img = self.transform(img) label = int(self.file_list[idx].split("_")[1].split(".npy")[0]) return img, label def create_datasets(data_dir, batch_size): transform = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(0.5), transforms.RandomRotation(20, resample=False, expand=False), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) dataset = CustomDataset(data_dir, transform) train_size = int(len(dataset) * 0.8) test_size = len(dataset) - train_size train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size]) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) return train_loader, test_loader ``` 2. 建立3D CNN模型 我们建立了一个3D CNN模型,它包含了几层卷积层和池化层。 ```python class ConvNet(nn.Module): def __init__(self): super(ConvNet, self).__init__() self.conv1 = nn.Conv3d(1, 32, kernel_size=3, stride=1, padding=1) self.activation1 = nn.ReLU(inplace=True) self.pool1 = nn.MaxPool3d(kernel_size=2) self.conv2 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1) self.activation2 = nn.ReLU(inplace=True) self.pool2 = nn.MaxPool3d(kernel_size=2) self.conv3 = nn.Conv3d(64, 128, kernel_size=3, stride=1, padding=1) self.activation3 = nn.ReLU(inplace=True) self.pool3 = nn.MaxPool3d(kernel_size=2) self.conv4 = nn.Conv3d(128, 256, kernel_size=3, stride=1, padding=1) self.activation4 = nn.ReLU(inplace=True) self.pool4 = nn.MaxPool3d(kernel_size=2) self.fc1 = nn.Linear(256*11*14*14, 512) self.activation5 = nn.ReLU(inplace=True) self.fc2 = nn.Linear(512, 2) def forward(self, x): x = self.conv1(x) x = self.activation1(x) x = self.pool1(x) x = self.conv2(x) x = self.activation2(x) x = self.pool2(x) x = self.conv3(x) x = self.activation3(x) x = self.pool3(x) x = self.conv4(x) x = self.activation4(x) x = self.pool4(x) x = x.view(-1, 256*11*14*14) x = self.fc1(x) x = self.activation5(x) x = self.fc2(x) return x ``` 3. 训练模型 接下来,我们将训练我们的模型。我们使用Adam优化器和交叉熵损失函数。我们还使用了学习率衰减和早期停止技术,以避免过拟合问题。 ```python def train(model, train_loader, test_loader, num_epochs, learning_rate=0.001, weight_decay=0.0): criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True) best_acc = 0.0 for epoch in range(num_epochs): train_loss = 0.0 train_acc = 0.0 for i, (inputs, labels) in enumerate(train_loader): optimizer.zero_grad() outputs = model(inputs.float().cuda()) loss = criterion(outputs, labels.cuda()) loss.backward() optimizer.step() train_loss += loss.item() * inputs.size(0) _, preds = torch.max(outputs.data, 1) train_acc += torch.sum(preds == labels.cuda().data) train_acc = train_acc.double() / len(train_loader.dataset) train_loss = train_loss / len(train_loader.dataset) print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}'.format(epoch+1, num_epochs, train_loss, train_acc)) test_loss = 0.0 test_acc = 0.0 with torch.no_grad(): for inputs, labels in test_loader: outputs = model(inputs.float().cuda()) loss = criterion(outputs, labels.cuda()) test_loss += loss.item() * inputs.size(0) _, preds = torch.max(outputs.data, 1) test_acc += torch.sum(preds == labels.cuda().data) test_acc = test_acc.double() / len(test_loader.dataset) test_loss = test_loss / len(test_loader.dataset) scheduler.step(test_loss) if test_acc > best_acc: best_acc = test_acc torch.save(model.state_dict(), 'best_model.pth') print('Epoch [{}/{}], Test Loss: {:.4f}, Test Acc: {:.4f}'.format(epoch+1, num_epochs, test_loss, test_acc)) ``` 4. 运行模型 最后,我们调用我们建立的模型和数据集等函数,运行模型: ```python def main(): data_dir = 'Brain_tumor_dataset' batch_size = 8 num_epochs = 100 train_loader, test_loader = create_datasets(data_dir, batch_size) model = ConvNet().cuda() train(model, train_loader, test_loader, num_epochs) if __name__ == '__main__': main() ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值