Halcon的pretrained_dl_segmentation_compact.hdl
0; weight_image; input; (640, 512, 1, 2); 62; -
1; segmentation_image_target; input; (640, 512, 1, 2); 2; -
2; target_internal; class_id_conversion; (640, 512, 1, 2); 62; -
11; image; input; (640, 512, 3, 2); 12, 57; -
12; conv1; convolution; (320, 256, 64, 2); 13; 7x7 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 2x2, and with padding
13; batchnorm1; batchnorm; (320, 256, 64, 2); 15; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
15; pool1; pooling; (160, 128, 64, 2); 21, 16; -
16; res0_block0_conv1; convolution; (160, 128, 64, 2); 17; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
17; res0_block0_bn1; batchnorm; (160, 128, 64, 2); 18; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
18; res0_block0_conv2; convolution; (160, 128, 64, 2); 19; 3x3 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
19; res0_block0_bn2; batchnorm; (160, 128, 64, 2); 20; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
20; res0_block0_conv3; convolution; (160, 128, 256, 2); 22; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
21; res0_block0_conv4; convolution; (160, 128, 256, 2); 22; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
22; res0_block0_sum; elementwise; (160, 128, 256, 2); 29, 23; -
23; res0_block1_bn1; batchnorm; (160, 128, 256, 2); 24; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
24; res0_block1_conv1; convolution; (160, 128, 64, 2); 25; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
25; res0_block1_bn2; batchnorm; (160, 128, 64, 2); 26; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
26; res0_block1_conv2; convolution; (160, 128, 64, 2); 27; 3x3 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
27; res0_block1_bn3; batchnorm; (160, 128, 64, 2); 28; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
28; res0_block1_conv3; convolution; (160, 128, 256, 2); 29; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
29; res0_block1_sum; elementwise; (160, 128, 256, 2); 36, 30; -
30; res0_block2_bn1; batchnorm; (160, 128, 256, 2); 31; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
31; res0_block2_conv1; convolution; (160, 128, 64, 2); 32; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
32; res0_block2_bn2; batchnorm; (160, 128, 64, 2); 33; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
33; res0_block2_conv2; convolution; (160, 128, 64, 2); 34; 3x3 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
34; res0_block2_bn3; batchnorm; (160, 128, 64, 2); 35; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
35; res0_block2_conv3; convolution; (160, 128, 256, 2); 36; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
36; res0_block2_sum; elementwise; (160, 128, 256, 2); 49, 46, 48, 43, 45, 40, 42, 37, 39; -
37; semseg_pooling6; pooling; (6, 6, 256, 2); 38; -
38; semseg_conv6; convolution; (6, 6, 64, 2); 39; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
39; semseg_upsample6; zoom; (160, 128, 64, 2); 49; -
40; semseg_pooling3; pooling; (3, 3, 256, 2); 41; -
41; semseg_conv3; convolution; (3, 3, 64, 2); 42; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
42; semseg_upsample3; zoom; (160, 128, 64, 2); 49; -
43; semseg_pooling2; pooling; (2, 2, 256, 2); 44; -
44; semseg_conv2; convolution; (2, 2, 64, 2); 45; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
45; semseg_upsample2; zoom; (160, 128, 64, 2); 49; -
46; semseg_pooling1; pooling; (1, 1, 256, 2); 47; -
47; semseg_conv1; convolution; (1, 1, 64, 2); 48; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
48; semseg_upsample1; zoom; (160, 128, 64, 2); 49; -
49; depth_concat; concat; (160, 128, 512, 2); 50; -
50; fire_squeeze1x1bnconv; convolution; (160, 128, 256, 2); 51; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
51; fire_squeeze1x1bnbn; batchnorm; (160, 128, 256, 2); 53, 52; batch norm layer with auto momentum, epsilon 0.0001
52; fire_expand3x3; convolution; (160, 128, 128, 2); 54; 3x3 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, with padding, and ReLU activation
53; fire_expand1x1; convolution; (160, 128, 128, 2); 54; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
54; after_semseg_conv; concat; (160, 128, 256, 2); 55; -
55; dropout; dropout; (160, 128, 256, 2); 56; -
56; final_conv; convolution; (160, 128, 3, 2); 57; 1x1 convolutional layer with 3 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
57; final_zoom; zoom; (640, 512, 3, 2); 58; -
58; softmax; softmax; (640, 512, 3, 2); 62, 60, 59; -
59; segmentation_confidence; depthmax; (640, 512, 1, 2); -; -
60; segmentation_image_internal; depthmax; (640, 512, 1, 2); 61; -
61; segmentation_image; class_id_conversion; (640, 512, 1, 2); -; -
62; crossentropy2d; loss_crossentropy2d; (1, 1, 1, 1); -; -
使用Pytorch实现Compact模型
import torch
import torch.nn as nn
import torch.nn.functional as F
class Halcon_Compact(nn.Module):
def __init__(self):
super(Halcon_Compact, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.batchnorm1 = nn.BatchNorm2d(64)
self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.res0_block0_conv1 = nn.Conv2d(64, 64, kernel_size=1)
self.res0_block0_bn1 = nn.BatchNorm2d(64)
self.res0_block0_conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.res0_block0_bn2 = nn.BatchNorm2d(64)
self.res0_block0_conv3 = nn.Conv2d(64, 256, kernel_size=1)
self.res0_block1_bn1 = nn.BatchNorm2d(256)
self.res0_block1_conv1 = nn.Conv2d(256, 64, kernel_size=1)
self.res0_block1_bn2 = nn.BatchNorm2d(64)
self.res0_block1_conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.res0_block1_bn3 = nn.BatchNorm2d(64)
self.res0_block1_conv3 = nn.Conv2d(64, 256, kernel_size=1)
self.res0_block2_bn1 = nn.BatchNorm2d(256)
self.res0_block2_conv1 = nn.Conv2d(256, 64, kernel_size=1)
self.res0_block2_bn2 = nn.BatchNorm2d(64)
self.res0_block2_conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.res0_block2_bn3 = nn.BatchNorm2d(64)
self.res0_block2_conv3 = nn.Conv2d(64, 256, kernel_size=1)
self.semseg_pooling6 = nn.MaxPool2d(kernel_size=6, stride=6)
self.semseg_conv6 = nn.Conv2d(256, 64, kernel_size=1)
self.semseg_upsample6 = nn.Upsample(scale_factor=6, mode='bilinear')
self.semseg_pooling3 = nn.MaxPool2d(kernel_size=3, stride=3)
self.semseg_conv3 = nn.Conv2d(256, 64, kernel_size=1)
self.semseg_upsample3 = nn.Upsample(scale_factor=3, mode='bilinear')
self.semseg_pooling2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.semseg_conv2 = nn.Conv2d(256, 64, kernel_size=1)
self.semseg_upsample2 = nn.Upsample(scale_factor=2, mode='bilinear')
self.semseg_pooling1 = nn.MaxPool2d(kernel_size=1, stride=1)
self.semseg_conv1 = nn.Conv2d(256, 64, kernel_size=1)
self.semseg_upsample1 = nn.Upsample(scale_factor=1, mode='bilinear')
self.fire_squeeze1x1bnconv = nn.Conv2d(512, 256, kernel_size=1)
self.fire_squeeze1x1bnbn = nn.BatchNorm2d(256)
self.fire_expand3x3 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
self.fire_expand1x1 = nn.Conv2d(256, 128, kernel_size=1)
self.after_semseg_conv = nn.Conv2d(512, 256, kernel_size=1)
self.dropout = nn.Dropout2d()
self.final_conv = nn.Conv2d(256, 3, kernel_size=1)
self.final_zoom = nn.Upsample(scale_factor=4, mode='bilinear')
self.softmax = nn.Softmax(dim=1)
def forward(self, weight_image, segmentation_image_target, target_internal, image):
x = self.conv1(image)
x = self.batchnorm1(x)
x = F.relu(x)
x = self.pool1(x)
res0_block0 = self.res0_block0_conv1(x)
res0_block0 = self.res0_block0_bn1(res0_block0)
res0_block0 = F.relu(res0_block0)
res0_block0 = self.res0_block0_conv2(res0_block0)
res0_block0 = self.res0_block0_bn2(res0_block0)
res0_block0 = F.relu(res0_block0)
res0_block0 = self.res0_block0_conv3(res0_block0)
res0_block1 = self.res0_block1_bn1(res0_block0)
res0_block1 = F.relu(res0_block1)
res0_block1 = self.res0_block1_conv1(res0_block1)
res0_block1 = self.res0_block1_bn2(res0_block1)
res0_block1 = F.relu(res0_block1)
res0_block1 = self.res0_block1_conv2(res0_block1)
res0_block1 = self.res0_block1_bn3(res0_block1)
res0_block1 = F.relu(res0_block1)
res0_block1 = self.res0_block1_conv3(res0_block1)
res0_block2 = self.res0_block2_bn1(res0_block1)
res0_block2 = F.relu(res0_block2)
res0_block2 = self.res0_block2_conv1(res0_block2)
res0_block2 = self.res0_block2_bn2(res0_block2)
res0_block2 = F.relu(res0_block2)
res0_block2 = self.res0_block2_conv2(res0_block2)
res0_block2 = self.res0_block2_bn3(res0_block2)
res0_block2 = F.relu(res0_block2)
res0_block2 = self.res0_block2_conv3(res0_block2)
semseg_pooling6 = self.semseg_pooling6(res0_block2)
semseg_conv6 = self.semseg_conv6(semseg_pooling6)
semseg_upsample6 = self.semseg_upsample6(semseg_conv6)
semseg_pooling3 = self.semseg_pooling3(res0_block2)
semseg_conv3 = self.semseg_conv3(semseg_pooling3)
semseg_upsample3 = self.semseg_upsample3(semseg_conv3)
semseg_pooling2 = self.semseg_pooling2(res0_block2)
semseg_conv2 = self.semseg_conv2(semseg_pooling2)
semseg_upsample2 = self.semseg_upsample2(semseg_conv2)
semseg_pooling1 = self.semseg_pooling1(res0_block2)
semseg_conv1 = self.semseg_conv1(semseg_pooling1)
semseg_upsample1 = self.semseg_upsample1(semseg_conv1)
depth_concat = torch.cat((semseg_upsample6, semseg_upsample3, semseg_upsample2, semseg_upsample1), dim=1)
fire_squeeze1x1bnconv = self.fire_squeeze1x1bnconv(depth_concat)
fire_squeeze1x1bnbn = self.fire_squeeze1x1bnbn(fire_squeeze1x1bnconv)
fire_expand3x3 = self.fire_expand3x3(fire_squeeze1x1bnbn)
fire_expand1x1 = self.fire_expand1x1(fire_squeeze1x1bnbn)
after_semseg_conv = self.after_semseg_conv(torch.cat((depth_concat, fire_expand3x3, fire_expand1x1), dim=1))
after_semseg_conv = self.dropout(after_semseg_conv)
final_conv = self.final_conv(after_semseg_conv)
final_zoom = self.final_zoom(final_conv)
softmax = self.softmax(final_zoom)
segmentation_confidence = torch.max(softmax, dim=1)[0].unsqueeze(1)
segmentation_image_internal = torch.argmax(softmax, dim=1).unsqueeze(1)
segmentation_image = target_internal(segmentation_image_internal)
crossentropy2d = F.cross_entropy(final_zoom, segmentation_image_target, reduction='mean')
return crossentropy2d, weight_image, segmentation_image_target, target_internal, image, segmentation_confidence, segmentation_image
model = Halcon_Compact()
weight_image = torch.randn(640, 512, 1, 2)
segmentation_image_target = torch.randn(640, 512, 1, 2)
target_internal = nn.Linear(62, 62)
image = torch.randn(640, 512, 3, 2)
output = model(weight_image, segmentation_image_target, target_internal, image)
print(output)
Halcon的pretrained_dl_segmentation_enhanced.hdl
0; weight_image; input; (640, 512, 1, 2); 103; -
1; segmentation_image_target; input; (640, 512, 1, 2); 2; -
2; target_internal; class_id_conversion; (640, 512, 1, 2); 103; -
23; image; input; (640, 512, 3, 2); 24, 98; -
24; conv1; convolution; (320, 256, 64, 2); 25; 7x7 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 2x2, and with padding
25; batchnorm1; batchnorm; (320, 256, 64, 2); 27; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
27; pool1; pooling; (160, 128, 64, 2); 33, 28; -
28; res0_block0_conv1; convolution; (160, 128, 64, 2); 29; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
29; res0_block0_bn1; batchnorm; (160, 128, 64, 2); 30; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
30; res0_block0_conv2; convolution; (160, 128, 64, 2); 31; 3x3 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
31; res0_block0_bn2; batchnorm; (160, 128, 64, 2); 32; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
32; res0_block0_conv3; convolution; (160, 128, 256, 2); 34; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
33; res0_block0_conv4; convolution; (160, 128, 256, 2); 34; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
34; res0_block0_sum; elementwise; (160, 128, 256, 2); 41, 35; -
35; res0_block1_bn1; batchnorm; (160, 128, 256, 2); 36; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
36; res0_block1_conv1; convolution; (160, 128, 64, 2); 37; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
37; res0_block1_bn2; batchnorm; (160, 128, 64, 2); 38; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
38; res0_block1_conv2; convolution; (160, 128, 64, 2); 39; 3x3 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
39; res0_block1_bn3; batchnorm; (160, 128, 64, 2); 40; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
40; res0_block1_conv3; convolution; (160, 128, 256, 2); 41; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
41; res0_block1_sum; elementwise; (160, 128, 256, 2); 48, 42; -
42; res0_block2_bn1; batchnorm; (160, 128, 256, 2); 43; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
43; res0_block2_conv1; convolution; (160, 128, 64, 2); 44; 1x1 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
44; res0_block2_bn2; batchnorm; (160, 128, 64, 2); 45; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
45; res0_block2_conv2; convolution; (160, 128, 64, 2); 46; 3x3 convolutional layer with 64 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
46; res0_block2_bn3; batchnorm; (160, 128, 64, 2); 47; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
47; res0_block2_conv3; convolution; (160, 128, 256, 2); 48; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
48; res0_block2_sum; elementwise; (160, 128, 256, 2); 55, 49; -
49; res1_block0_bn1; batchnorm; (160, 128, 256, 2); 50; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
50; res1_block0_conv1; convolution; (160, 128, 128, 2); 51; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
51; res1_block0_bn2; batchnorm; (160, 128, 128, 2); 52; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
52; res1_block0_conv2; convolution; (80, 64, 128, 2); 53; 3x3 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 2x2, and with padding
53; res1_block0_bn3; batchnorm; (80, 64, 128, 2); 54; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
54; res1_block0_conv3; convolution; (80, 64, 512, 2); 56; 1x1 convolutional layer with 512 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
55; res1_block0_conv4; convolution; (80, 64, 512, 2); 56; 1x1 convolutional layer with 512 kernels, 1 groups, dilation 1x1, stride 2x2, and without padding
56; res1_block0_sum; elementwise; (80, 64, 512, 2); 63, 57; -
57; res1_block1_bn1; batchnorm; (80, 64, 512, 2); 58; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
58; res1_block1_conv1; convolution; (80, 64, 128, 2); 59; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
59; res1_block1_bn2; batchnorm; (80, 64, 128, 2); 60; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
60; res1_block1_conv2; convolution; (80, 64, 128, 2); 61; 3x3 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
61; res1_block1_bn3; batchnorm; (80, 64, 128, 2); 62; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
62; res1_block1_conv3; convolution; (80, 64, 512, 2); 63; 1x1 convolutional layer with 512 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
63; res1_block1_sum; elementwise; (80, 64, 512, 2); 70, 64; -
64; res1_block2_bn1; batchnorm; (80, 64, 512, 2); 65; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
65; res1_block2_conv1; convolution; (80, 64, 128, 2); 66; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
66; res1_block2_bn2; batchnorm; (80, 64, 128, 2); 67; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
67; res1_block2_conv2; convolution; (80, 64, 128, 2); 68; 3x3 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
68; res1_block2_bn3; batchnorm; (80, 64, 128, 2); 69; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
69; res1_block2_conv3; convolution; (80, 64, 512, 2); 70; 1x1 convolutional layer with 512 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
70; res1_block2_sum; elementwise; (80, 64, 512, 2); 77, 71; -
71; res1_block3_bn1; batchnorm; (80, 64, 512, 2); 72; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
72; res1_block3_conv1; convolution; (80, 64, 128, 2); 73; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
73; res1_block3_bn2; batchnorm; (80, 64, 128, 2); 74; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
74; res1_block3_conv2; convolution; (80, 64, 128, 2); 75; 3x3 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, and with padding
75; res1_block3_bn3; batchnorm; (80, 64, 128, 2); 76; batch norm layer with freezed running average, epsilon 0.0001, and ReLU activation
76; res1_block3_conv3; convolution; (80, 64, 512, 2); 77; 1x1 convolutional layer with 512 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
77; res1_block3_sum; elementwise; (80, 64, 512, 2); 90, 87, 89, 84, 86, 81, 83, 78, 80; -
78; semseg_pooling6; pooling; (6, 6, 512, 2); 79; -
79; semseg_conv6; convolution; (6, 6, 128, 2); 80; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
80; semseg_upsample6; zoom; (80, 64, 128, 2); 90; -
81; semseg_pooling3; pooling; (3, 3, 512, 2); 82; -
82; semseg_conv3; convolution; (3, 3, 128, 2); 83; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
83; semseg_upsample3; zoom; (80, 64, 128, 2); 90; -
84; semseg_pooling2; pooling; (2, 2, 512, 2); 85; -
85; semseg_conv2; convolution; (2, 2, 128, 2); 86; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
86; semseg_upsample2; zoom; (80, 64, 128, 2); 90; -
87; semseg_pooling1; pooling; (1, 1, 512, 2); 88; -
88; semseg_conv1; convolution; (1, 1, 128, 2); 89; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
89; semseg_upsample1; zoom; (80, 64, 128, 2); 90; -
90; depth_concat; concat; (80, 64, 1024, 2); 91; -
91; fire_squeeze1x1bnconv; convolution; (80, 64, 256, 2); 92; 1x1 convolutional layer with 256 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
92; fire_squeeze1x1bnbn; batchnorm; (80, 64, 256, 2); 94, 93; batch norm layer with auto momentum, epsilon 0.0001
93; fire_expand3x3; convolution; (80, 64, 128, 2); 95; 3x3 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, with padding, and ReLU activation
94; fire_expand1x1; convolution; (80, 64, 128, 2); 95; 1x1 convolutional layer with 128 kernels, 1 groups, dilation 1x1, stride 1x1, without padding, and ReLU activation
95; after_semseg_conv; concat; (80, 64, 256, 2); 96; -
96; dropout; dropout; (80, 64, 256, 2); 97; -
97; final_conv; convolution; (80, 64, 3, 2); 98; 1x1 convolutional layer with 3 kernels, 1 groups, dilation 1x1, stride 1x1, and without padding
98; final_zoom; zoom; (640, 512, 3, 2); 99; -
99; softmax; softmax; (640, 512, 3, 2); 103, 101, 100; -
100; segmentation_confidence; depthmax; (640, 512, 1, 2); -; -
101; segmentation_image_internal; depthmax; (640, 512, 1, 2); 102; -
102; segmentation_image; class_id_conversion; (640, 512, 1, 2); -; -
103; crossentropy2d; loss_crossentropy2d; (1, 1, 1, 1); -; -
转换为Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class Halcon_Enhanced(nn.Module):
def __init__(self):
super(Halcon_Enhanced, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.batchnorm1 = nn.BatchNorm2d(64)
self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.res0_block0_conv1 = nn.Conv2d(64, 64, kernel_size=1)
self.res0_block0_bn1 = nn.BatchNorm2d(64)
self.res0_block0_conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.res0_block0_bn2 = nn.BatchNorm2d(64)
self.res0_block0_conv3 = nn.Conv2d(64, 256, kernel_size=1)
self.res0_block0_conv4 = nn.Conv2d(64, 256, kernel_size=1)
self.res0_block1_bn1 = nn.BatchNorm2d(256)
self.res0_block1_conv1 = nn.Conv2d(256, 64, kernel_size=1)
self.res0_block1_bn2 = nn.BatchNorm2d(64)
self.res0_block1_conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.res0_block1_bn3 = nn.BatchNorm2d(64)
self.res0_block1_conv3 = nn.Conv2d(64, 256, kernel_size=1)
self.res0_block2_bn1 = nn.BatchNorm2d(256)
self.res0_block2_conv1 = nn.Conv2d(256, 64, kernel_size=1)
self.res0_block2_bn2 = nn.BatchNorm2d(64)
self.res0_block2_conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.res0_block2_bn3 = nn.BatchNorm2d(64)
self.res0_block2_conv3 = nn.Conv2d(64, 256, kernel_size=1)
self.res1_block0_bn1 = nn.BatchNorm2d(256)
self.res1_block0_conv1 = nn.Conv2d(256, 128, kernel_size=1)
self.res1_block0_bn2 = nn.BatchNorm2d(128)
self.res1_block0_conv2 = nn.Conv2d(128, 128, kernel_size=3, padding=1, stride=2)
self.res1_block0_bn3 = nn.BatchNorm2d(128)
self.res1_block0_conv3 = nn.Conv2d(128, 512, kernel_size=1)
self.res1_block0_conv4 = nn.Conv2d(128, 512, kernel_size=1)
self.res1_block1_bn1 = nn.BatchNorm2d(512)
self.res1_block1_conv1 = nn.Conv2d(512, 128, kernel_size=1)
self.res1_block1_bn2 = nn.BatchNorm2d(128)
self.res1_block1_conv2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.res1_block1_bn3 = nn.BatchNorm2d(128)
self.res1_block1_conv3 = nn.Conv2d(128, 512, kernel_size=1)
self.res1_block2_bn1 = nn.BatchNorm2d(512)
self.res1_block2_conv1 = nn.Conv2d(512, 128, kernel_size=1)
self.res1_block2_bn2 = nn.BatchNorm2d(128)
self.res1_block2_conv2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.res1_block2_bn3 = nn.BatchNorm2d(128)
self.res1_block2_conv3 = nn.Conv2d(128, 512, kernel_size=1)
self.res1_block3_bn1 = nn.BatchNorm2d(512)
self.res1_block3_conv1 = nn.Conv2d(512, 128, kernel_size=1)
self.res1_block3_bn2 = nn.BatchNorm2d(128)
self.res1_block3_conv2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
self.res1_block3_bn3 = nn.BatchNorm2d(128)
self.res1_block3_conv3 = nn.Conv2d(128, 512, kernel_size=1)
self.semseg_pooling6 = nn.MaxPool2d(kernel_size=6, stride=6)
self.semseg_conv6 = nn.Conv2d(512, 128, kernel_size=1)
self.semseg_upsample6 = nn.Upsample(scale_factor=6, mode='bilinear')
self.semseg_pooling3 = nn.MaxPool2d(kernel_size=3, stride=3)
self.semseg_conv3 = nn.Conv2d(512, 128, kernel_size=1)
self.semseg_upsample3 = nn.Upsample(scale_factor=3, mode='bilinear')
self.semseg_pooling2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.semseg_conv2 = nn.Conv2d(512, 128, kernel_size=1)
self.semseg_upsample2 = nn.Upsample(scale_factor=2, mode='bilinear')
self.semseg_pooling1 = nn.MaxPool2d(kernel_size=1, stride=1)
self.semseg_conv1 = nn.Conv2d(512, 128, kernel_size=1)
self.semseg_upsample1 = nn.Upsample(scale_factor=1, mode='bilinear')
self.fire_squeeze1x1bnconv = nn.Conv2d(1024, 256, kernel_size=1)
self.fire_squeeze1x1bnbn = nn.BatchNorm2d(256)
self.fire_expand3x3 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
self.fire_expand1x1 = nn.Conv2d(256, 128, kernel_size=1)
self.after_semseg_conv = nn.Conv2d(1024, 256, kernel_size=1)
self.dropout = nn.Dropout2d()
self.final_conv = nn.Conv2d(256, 3, kernel_size=1)
self.final_zoom = nn.Upsample(scale_factor=4, mode='bilinear')
self.softmax = nn.Softmax(dim=1)
def forward(self, weight_image, segmentation_image_target, target_internal, image):
x = self.conv1(image)
x = self.batchnorm1(x)
x = F.relu(x)
x = self.pool1(x)
res0_block0 = self.res0_block0_conv1(x)
res0_block0 = self.res0_block0_bn1(res0_block0)
res0_block0 = F.relu(res0_block0)
res0_block0 = self.res0_block0_conv2(res0_block0)
res0_block0 = self.res0_block0_bn2(res0_block0)
res0_block0 = F.relu(res0_block0)
res0_block0 = self.res0_block0_conv3(res0_block0)
res0_block0 = self.res0_block0_conv4(res0_block0)
res0_block1 = self.res0_block1_bn1(res0_block0)
res0_block1 = F.relu(res0_block1)
res0_block1 = self.res0_block1_conv1(res0_block1)
res0_block1 = self.res0_block1_bn2(res0_block1)
res0_block1 = F.relu(res0_block1)
res0_block1 = self.res0_block1_conv2(res0_block1)
res0_block1 = self.res0_block1_bn3(res0_block1)
res0_block1 = F.relu(res0_block1)
res0_block1 = self.res0_block1_conv3(res0_block1)
res0_block2 = self.res0_block2_bn1(res0_block1)
res0_block2 = F.relu(res0_block2)
res0_block2 = self.res0_block2_conv1(res0_block2)
res0_block2 = self.res0_block2_bn2(res0_block2)
res0_block2 = F.relu(res0_block2)
res0_block2 = self.res0_block2_conv2(res0_block2)
res0_block2 = self.res0_block2_bn3(res0_block2)
res0_block2 = F.relu(res0_block2)
res0_block2 = self.res0_block2_conv3(res0_block2)
res1_block0 = self.res1_block0_bn1(res0_block2)
res1_block0 = F.relu(res1_block0)
res1_block0 = self.res1_block0_conv1(res1_block0)
res1_block0 = self.res1_block0_bn2(res1_block0)
res1_block0 = F.relu(res1_block0)
res1_block0 = self.res1_block0_conv2(res1_block0)
res1_block0 = self.res1_block0_bn3(res1_block0)
res1_block0 = F.relu(res1_block0)
res1_block0 = self.res1_block0_conv3(res1_block0)
res1_block0 = self.res1_block0_conv4(res1_block0)
res1_block1 = self.res1_block1_bn1(res1_block0)
res1_block1 = F.relu(res1_block1)
res1_block1 = self.res1_block1_conv1(res1_block1)
res1_block1 = self.res1_block1_bn2(res1_block1)
res1_block1 = F.relu(res1_block1)
res1_block1 = self.res1_block1_conv2(res1_block1)
res1_block1 = self.res1_block1_bn3(res1_block1)
res1_block1 = F.relu(res1_block1)
res1_block1 = self.res1_block1_conv3(res1_block1)
res1_block2 = self.res1_block2_bn1(res1_block1)
res1_block2 = F.relu(res1_block2)
res1_block2 = self.res1_block2_conv1(res1_block2)
res1_block2 = self.res1_block2_bn2(res1_block2)
res1_block2 = F.relu(res1_block2)
res1_block2 = self.res1_block2_conv2(res1_block2)
res1_block2 = self.res1_block2_bn3(res1_block2)
res1_block2 = F.relu(res1_block2)
res1_block2 = self.res1_block2_conv3(res1_block2)
res1_block3 = self.res1_block3_bn1(res1_block2)
res1_block3 = F.relu(res1_block3)
res1_block3 = self.res1_block3_conv1(res1_block3)
res1_block3 = self.res1_block3_bn2(res1_block3)
res1_block3 = F.relu(res1_block3)
res1_block3 = self.res1_block3_conv2(res1_block3)
res1_block3 = self.res1_block3_bn3(res1_block3)
res1_block3 = F.relu(res1_block3)
res1_block3 = self.res1_block3_conv3(res1_block3)
semseg_pooling6 = self.semseg_pooling6(res1_block3)
semseg_conv6 = self.semseg_conv6(semseg_pooling6)
semseg_upsample6 = self.semseg_upsample6(semseg_conv6)
semseg_pooling3 = self.semseg_pooling3(res1_block3)
semseg_conv3 = self.semseg_conv3(semseg_pooling3)
semseg_upsample3 = self.semseg_upsample3(semseg_conv3)
semseg_pooling2 = self.semseg_pooling2(res1_block3)
semseg_conv2 = self.semseg_conv2(semseg_pooling2)
semseg_upsample2 = self.semseg_upsample2(semseg_conv2)
semseg_pooling1 = self.semseg_pooling1(res1_block3)
semseg_conv1 = self.semseg_conv1(semseg_pooling1)
semseg_upsample1 = self.semseg_upsample1(semseg_conv1)
depth_concat = torch.cat((semseg_upsample6, semseg_upsample3, semseg_upsample2, semseg_upsample1), dim=1)
fire_squeeze1x1bnconv = self.fire_squeeze1x1bnconv(depth_concat)
fire_squeeze1x1bnbn = self.fire_squeeze1x1bnbn(fire_squeeze1x1bnconv)
fire_expand3x3 = self.fire_expand3x3(fire_squeeze1x1bnbn)
fire_expand1x1 = self.fire_expand1x1(fire_squeeze1x1bnbn)
after_semseg_conv = self.after_semseg_conv(torch.cat((depth_concat, fire_expand3x3, fire_expand1x1), dim=1))
after_semseg_conv = self.dropout(after_semseg_conv)
final_conv = self.final_conv(after_semseg_conv)
final_zoom = self.final_zoom(final_conv)
softmax = self.softmax(final_zoom)
segmentation_confidence = torch.max(softmax, dim=1)[0].unsqueeze(1)
segmentation_image_internal = torch.argmax(softmax, dim=1).unsqueeze(1)
segmentation_image = target_internal(segmentation_image_internal)
crossentropy2d = F.cross_entropy(final_zoom, segmentation_image_target, reduction='mean')
return crossentropy2d, weight_image, segmentation_image_target, target_internal, image, segmentation_confidence, segmentation_image
model = Halcon_Enhanced()
weight_image = torch.randn(640, 512, 1, 2)
segmentation_image_target = torch.randn(640, 512, 1, 2)
target_internal = nn.Linear(103, 103)
image = torch.randn(640, 512, 3, 2)
output = model(weight_image, segmentation_image_target, target_internal, image)
print(output)