# Neural network based on pytorch

Last time we talked about classical networks, this time we talked about residual networks and Yolo algorithm based on residual networks
Now let’s take a look at his basic principle, as shown in the figure, that is to say

The residual network can have many layers, but the total can be divided into five chunks

So how do we implement a block

def Conv1(in_planes, places, stride=2):
return nn.Sequential(
nn.BatchNorm2d(places),
nn.ReLU(inplace=True),
)

class Bottleneck(nn.Module):
def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4):
super(Bottleneck,self).__init__()
self.expansion = expansion
self.downsampling = downsampling

self.bottleneck = nn.Sequential(
nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
nn.BatchNorm2d(places),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(places),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(places*self.expansion),
)

if self.downsampling:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(places*self.expansion)
)
self.relu = nn.ReLU(inplace=True)

def forward(self, x):
residual = x
out = self.bottleneck(x)

if self.downsampling:
residual = self.downsample(x)

out += residual
out = self.relu(out)
return out


and then we have five block

class ResNet(nn.Module):
def __init__(self,blocks, num_classes=1000, expansion = 4):
super(ResNet,self).__init__()
self.expansion = expansion

self.conv1 = Conv1(in_planes = 3, places= 64)

self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1)
self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2)
self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2)
self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2)

self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(2048,2)

for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan'
'_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)

def make_layer(self, in_places, places, block, stride):
layers = []
layers.append(Bottleneck(in_places, places,stride, downsampling =True))
for i in range(1, block):
layers.append(Bottleneck(places*self.expansion, places))

return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)

x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x


This is a network structure of more than 50 layers, so we instantiate three objects

def ResNet50():
return ResNet([3, 4, 6, 3])

def ResNet101():
return ResNet([3, 4, 23, 3])

def ResNet152():
return ResNet([3, 8, 36, 3])


If this is too much trouble for you, you can use transfer learning

import torchvision.models as models
resnet50=models.resnet50(pretrained=False)
resnet50.fc=nn.Linear(2048,2)


Let’s talk about the target detection based on yolo algorithm
I’m using an open source project here
https://github.com/ultralytics/yolov3
I won’t go into the details here, but let’s look at the training process
Then we will train the network to generate weights, called in opencv

def video_demo():
# 加载已经训练好的模型路径，可以是绝对路径或者相对路径
# 初始化一些参数
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")  # 颜色
boxes = []
confidences = []
classIDs = []
# 读入待检测的图像
# 0是代表摄像头编号，只有一个的话默认为0
capture = cv2.VideoCapture(0)
while (True):
(H, W) = image.shape[:2]
# 得到 YOLO需要的输出层
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# 从输入图像构造一个blob，然后通过加载的模型，给我们提供边界框和相关概率
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(ln)
# 在每层输出上循环
for output in layerOutputs:
# 对每个检测进行循环
for detection in output:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# 过滤掉那些置信度较小的检测结果
if confidence > 0.5:
# 框后接框的宽度和高度
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# 边框的左上角
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# 更新检测出来的框
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# 极大值抑制
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
if len(idxs) > 0:
for i in idxs.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# 在原图上绘制边框和类别
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
cv2.imshow("Image", image)
# 等待30ms显示图像，若过程中按“ESC”退出
c = cv2.waitKey(30) & 0xff
if c == 27:
capture.release()
break

if __name__ == '__main__':
parser = argparse.ArgumentParser()
opt=parser.parse_args()
weights = opt.weights
if weights.endswith('.pt'):
model = Darknet(opt.cfg)