课程记录
从模型创建方法到AlexNet
课程代码
无
作业
1. 采用步进(Step into)的调试方法从创建网络模型开始(net = LeNet(classes=2))进入到每一个被调用函数,观察net的_modules字段何时被构建并且赋值,记录其中所有进入的类与函数
例如:
第一步:net = LeNet(classes=2)
第二步:LeNet类,__init__(),super(LeNet, self).__init__()
第三步: Module类, ......
第n步:返回net
略
2. 采用sequential容器,改写Alexnet,给features中每一个网络层增加名字,并通过下面这行代码打印出来
print(alexnet._modules['features']._modules.keys())
1. AlexNet
import os
import zipfile
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# unzip
print(os.getcwd())
os.makedirs('data', exist_ok=True)
with zipfile.ZipFile('../input/dogs-vs-cats-redux-kernels-edition/train.zip') as train_zip:
train_zip.extractall('data')
with zipfile.ZipFile('../input/dogs-vs-cats-redux-kernels-edition/test.zip') as test_zip:
test_zip.extractall('data')
# show unzip dir
train_dir = './data/train'
test_dir = './data/test'
print('len:', len(os.listdir(train_dir)), len(os.listdir(test_dir)))
os.listdir(train_dir)[:5]
os.listdir(test_dir)[:5]
import numpy as np
import pandas as pd
import glob
import os
import torch
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
batch_size = 100
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
torch.manual_seed(1234)
if device =='cuda':
torch.cuda.manual_seed_all(1234)
lr = 0.001
train_list = glob.glob(os.path.join(train_dir,'*.jpg'))
test_list = glob.glob(os.path.join(test_dir, '*.jpg'))
print('show data:', len(train_list), train_list[:3])
print('show data:', len(test_list), test_list[:3])
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
img = Image.open(train_list[0])
plt.imshow(img)
plt.axis('off')
plt.show()
print(type(img))
img_np = np.asarray(img)
print(img_np.shape)
train_list, val_list = train_test_split(train_list, test_size=0.2)
print(len(train_list), train_list[:3])
print(len(val_list), val_list[:3])
train_transforms = transforms.Compose([
transforms.Resize((224, 224)),
# transforms.RandomCrop(224),
transforms.ToTensor(),
])
val_transforms = transforms.Compose([
transforms.Resize((224, 224)),
# transforms.RandomCrop(224),
transforms.ToTensor(),
])
test_transforms = transforms.Compose([
transforms.Resize((224, 224)),
# transforms.RandomCrop(224),
transforms.ToTensor(),
])
class dataset(torch.utils.data.Dataset):
def __init__(self,file_list,now_transform):
self.file_list = file_list # list of path
self.transform = now_transform
def __len__(self):
self.filelength = len(self.file_list)
return self.filelength
def __getitem__(self,idx):
img_path = self.file_list[idx]
img = Image.open(img_path)
# print(img.size)
img_transformed = self.transform(img)
# test 没有标签?
label = img_path.split('/')[-1].split('.')[0]
if label == 'dog':
label=1
elif label == 'cat':
label=0
else:
assert False
return img_transformed,label
train_data = dataset(train_list, train_transforms)
val_data = dataset(val_list, test_transforms)
# test_data = dataset(test_list, transform=test_transforms)
train_loader = torch.utils.data.DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True )
val_loader = torch.utils.data.DataLoader(dataset = val_data, batch_size=batch_size, shuffle=True)
# test_loader = torch.utils.data.DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True)
print(len(train_data), len(train_loader))
print(len(val_data), len(val_loader))
print(train_data, type(train_data))
t1, t2 = train_data[7]
print(t1, t2)
print(type(t1))
print(t1.shape)
class AlexNet(nn.Module):
def __init__(self, num_classes = 2):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
# model = CNN_STD().to(device)
model = AlexNet().to(device)
model.train()
print(model._modules.keys())
print(model._modules['features'][0])
optimizer = optim.Adam(params = model.parameters(),lr=lr)
loss_f = nn.CrossEntropyLoss()
epochs = 10
print('start epoch iter, please wait...')
for epoch in range(epochs):
epoch_loss = 0
epoch_accuracy = 0
for data, label in train_loader:
data = data.to(device)
label = label.to(device)
output = model(data)
loss = loss_f(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = ((output.argmax(dim=1) == label).float().mean())
epoch_accuracy += acc/len(train_loader)
epoch_loss += loss/len(train_loader)
print('Epoch : {}, train accuracy : {}, train loss : {}'.format(epoch+1, epoch_accuracy,epoch_loss))
with torch.no_grad():
epoch_val_accuracy=0
epoch_val_loss =0
for data, label in val_loader:
data = data.to(device)
label = label.to(device)
val_output = model(data)
val_loss = loss_f(val_output,label)
acc = ((val_output.argmax(dim=1) == label).float().mean())
epoch_val_accuracy += acc/ len(val_loader)
epoch_val_loss += val_loss/ len(val_loader)
print('Epoch : {}, val_accuracy : {}, val_loss : {}'.format(epoch+1, epoch_val_accuracy,epoch_val_loss))
注意, 猫狗大战训练效果不理想, 不知是数据太小, 还是代码错, 还是需要调超参