1.CNN感受野
经过卷积操作后的输出只与部分输入有关,反向推回去的区域即为感受野。
out=(9-3+2*0)/2+1=4
out=(4-2+2*0)/2+1=2
使用多个小的卷积核所需要的参数更少。
2.网格构建
2.1 vgg网络参数
vgg各个版本的基本参数一样,只是卷积核个数不一样。
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
2.2 定义特征提取网络
def make_features(cfg: list):
layers = []
#三通道彩色图像
in_channels = 3
for v in cfg:
if v == "M":
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
layers += [conv2d, nn.ReLU(True)]
in_channels = v #经过卷积后。通道数发生改变
#*layers即将layers中的一个个元素提出来
return nn.Sequential(*layers)
2.3 定义分类网络
初始化模型,并定义全连接层
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=False):
super(VGG, self).__init__()
self.features = features #提取特征网络结构
self.classifier = nn.Sequential(
nn.Linear(512*7*7, 4096),
nn.ReLU(True),
nn.Dropout(p=0.5), #随机失活
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
#最后一层用softmax,故不用relu
nn.Linear(4096, num_classes) #最后输出的分类数
)
if init_weights: # 是否初始化权重参数
self._initialize_weights()
正向传播
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
初始化权重,用xavier_uniform的方法
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
nn.init.constant_(m.bias, 0)
3.实例化模型
# 进行实例化
# **kwargs是将一个可变的关键字参数的字典传给函数实参
def vgg(model_name="vgg16", **kwargs): # **kwargs关键字参数
assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name) #assert 断言
cfg = cfgs[model_name]
model = VGG(make_features(cfg), **kwargs)
return model
4.训练
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
train_path = "zhang/train"
val_path = "zhang/val"
train_data = datasets.ImageFolder(root=train_path,transform=data_transform["train"])
val_data = datasets.ImageFolder(root=val_path,transform = data_transform["val"])
train_dataset = DataLoader(train_data,batch_size=32,shuffle=True)
val_dataset = DataLoader(val_data, batch_size=32, shuffle=True)
val_num = len(val_data)
flower_list = train_data.class_to_idx
class_dict = dict((val, key) for key, val in flower_list.items()) # 将 key 和val 反过来
json_str = json.dumps(class_dict, indent=4) # indent=4换行,更加美观
with open("class_indices.json", "w", encoding="utf-8") as f:
f.write(json_str)
#开始训练
net = vgg("vgg16",num_classes=5,init_weights=True)
net.to(device)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0002)
best_acc = 0.0 # 用于后面设置准确率
for epoch in range(10):
print(f"----------第{epoch + 1}轮训练开始--------")
# train dropout使用后需要.train .eval
net.train()
for i, (images, labels) in enumerate(train_dataset):
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
# 优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
# val
net.eval()
accuracy = 0.0
with torch.no_grad():
for data in val_dataset:
test_Img, target = data
output = net(test_Img.to(device))
# dim=1表示的是行
predict_y = torch.argmax(output, dim=1)
accuracy += (predict_y == target.to(device)).sum().item()
acc_rate = accuracy / val_num
# 选取训练效果最好的模型保存
if acc_rate > best_acc:
best_acc = acc_rate
torch.save(net.state_dict(), './VGG.pth')
print(acc_rate)
if __name__ == '__main__':
main()
5.predict
与AlexNet大同小异
import json
import torch
from PIL import Image
from torchvision import transforms
from model import vgg
data_transform = transforms.Compose([
transforms.Resize((224,244)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
img_path = "./th.jpg"
img = Image.open(img_path)
img = data_transform(img)
#扩充维度,否则图片是三维的
img = torch.unsqueeze(img,dim = 0)
try:
json_file = open("class_indices.json","r")
class_indict = json.load(json_file)
except Exception as e:
print(e)
exit(-1)
#初始化网络
model = vgg("vgg16",num_classes=5,init_weights=False)
model_path = "VGG.pth"
model.load_state_dict(torch.load(model_path))
model.eval()
with torch.no_grad():
output = torch.squeeze(model(img)) #压缩掉第一个维度
predict = torch.softmax(output,dim=0) #得到概率
predict_cla = torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)],predict[predict_cla].item())