1.先定义网络
卷积的特性
拥有局部感知机制:以滑动窗口的形式在特征图上进行滑动计算,所以具有局部感知能力
权值共享:滑动过程中,卷积核的值不会发生变化,所以又具有权值共享的特性
import torch
from torch import nn
class AlexNet(nn.Module):
def __init__(self,num_classes = 1000):
super(AlexNet, self).__init__()
self.feature = nn.Sequential(
nn.Conv2d(3,48,kernel_size=11,stride=4,padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Conv2d(48,128,kernel_size=5,padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Conv2d(128,192,kernel_size=3,padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192,192,kernel_size=3,padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2)
)
self.classifier = nn.Sequential(
#dropout 可以让部分神经元失活,防止过拟合
#p默认0.5
nn.Dropout(p=0.5),
nn.Linear(128*6*6,2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048,2048),
nn.ReLU(inplace=True),
nn.Linear(2048,num_classes)
)
def forward(self,x):
x = self.feature(x)
x = torch.flatten(x,start_dim=1)
x= self.classifier(x)
return x
if __name__ == '__main__':
model = AlexNet(num_classes=5)
input = torch.ones(64,3,224,224)
output = model(input)
print(output.shape)
2.准备好数据
from torch.utils.data import DataLoader
from torchvision import transforms,datasets
import torch.optim as optim
from AlexNet_learning.model import AlexNet
import json
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#定义加载预处理的字典,对数据增强后归一化
data_transform = {
"train": transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
]),
"val": transforms.Compose([
transforms.Resize((224,244)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
}
train_path = "./zhang/train"
train_dataset = datasets.ImageFolder(root=train_path,transform = data_transform["train"])
#获取分类的索引 {'daisy': 0, 'dandelion': 1, 'roses': 2, 'sunflowers': 3, 'tulips': 4}
flower_list = train_dataset.class_to_idx
class_dict = dict((val,key) for key,val in flower_list.items())#将 key 和val 反过来
json_str = json.dumps(class_dict,indent=4) #indent=4换行,更加美观
#分类转为json文件
with open("class_indices.json","w",encoding="utf-8") as f:
f.write(json_str)
#导入训练集
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True) #设置为TRUE 打乱顺序,训练时需采用
#导入验证集
val_path = "./zhang/val"
val_dataset = datasets.ImageFolder(root=val_path,transform = data_transform["val"])
val_num = len(val_dataset)
val_dataloader = DataLoader(val_dataset,batch_size=32,shuffle=False)
3.开始训练
CrossEntropyLoss交叉熵损失
其中softmax函数用于多分类,sigmoid用于二分类,Adam有着优于softmax函数的性能。
net = AlexNet(num_classes = 5)
net.to(device)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(),lr = 0.0002)
best_acc = 0.0 #用于后面设置准确率
for epoch in range(20):
print(f"----------第{epoch + 1}轮训练开始--------")
#train dropout使用后需要.train .eval
net.train()
for i, (images, labels) in enumerate(train_dataloader):
outputs = net(images.to(device))
loss = loss_function(outputs,labels.to(device))
#优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
#val
net.eval()
accuracy = 0.0
with torch.no_grad():
for data in val_dataloader:
test_Img,target = data
output = net(test_Img.to(device))
#dim=1表示的是行
predict_y = torch.argmax(output, dim=1)
accuracy += (predict_y == target.to(device)).sum().item()
acc_rate = accuracy/val_num
#选取训练效果最好的模型保存
if acc_rate > best_acc:
best_acc = acc_rate
torch.save(net.state_dict(),'./AlexNet.pth')
print(acc_rate)
4.测试
如果是猫和狗可以,输入一张图片,输出要么是猫要么是狗采用softmax输出,只能归于一个类别不可能归于多个类别;如果输入一张图片,输出包括人类和男人,那就不满足概率分布了,因为可能归于人也可能归于男人,那就归于二分类问题了,采用sigmoid输出。
import json
import torch
from PIL import Image
from torchvision import transforms
from AlexNet_learning.model import AlexNet
data_transform = transforms.Compose([
transforms.Resize((224,244)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
img_path = "./th.jpg"
img = Image.open(img_path)
img = data_transform(img)
#扩充维度,否则图片是三维的
img = torch.unsqueeze(img,dim = 0)
try:
json_file = open("./class_indices.json","r")
class_indict = json.load(json_file)
except Exception as e:
print(e)
exit(-1)
#初始化网络
model = AlexNet(num_classes=5)
model_path = "./AlexNet.pth"
model.load_state_dict(torch.load(model_path))
model.eval()
with torch.no_grad():
output = torch.squeeze(model(img)) #压缩掉第一个维度
predict = torch.softmax(output,dim=0) #得到概率
predict_cla = torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)],predict[predict_cla].item())
5.预测
与AlexNet差不多
import json
import torch
from PIL import Image
from torchvision import transforms
from model import vgg
data_transform = transforms.Compose([
transforms.Resize((224,244)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
img_path = "./th.jpg"
img = Image.open(img_path)
img = data_transform(img)
#扩充维度,否则图片是三维的
img = torch.unsqueeze(img,dim = 0)
try:
json_file = open("class_indices.json","r")
class_indict = json.load(json_file)
except Exception as e:
print(e)
exit(-1)
#初始化网络
model = vgg("vgg16",num_classes=5,init_weights=False)
model_path = "VGG.pth"
model.load_state_dict(torch.load(model_path))
model.eval()
with torch.no_grad():
output = torch.squeeze(model(img)) #压缩掉第一个维度
predict = torch.softmax(output,dim=0) #得到概率
predict_cla = torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)],predict[predict_cla].item())