1,ResNet的思想 y=F(x)+x 这个经验可帮助模型更快的收敛
class Block(nn.Module): # Encoder Block
def __init__(self,
dim, # 每个token的维度
drop_rate=0.1,
switch_flag=False,
num_heads=8):
super(Block, self).__init__()
self.switch_flag = switch_flag
self.norm1 = nn.GroupNorm(1, dim)
# self.norm1 = nn.BatchNorm2d(dim)
if self.switch_flag:
self.attn = MHSA(n_dims=dim, num_heads=num_heads)
else:
# self.attn = nn.AdaptiveAvgPool2d((16, 16))
self.attn = Pooling()
self.drop_path = DropPath(drop_rate) if drop_rate > 0. else nn.Identity()
self.norm2 = nn.GroupNorm(1, dim)
self.mlp = MLP(in_features=dim, drop=drop_rate)
def forward(self, x):
x = x + self.drop_path(self.attn(self.norm1(x)))
x = x + self.mlp(self.norm2(x))
return x
2,在模型最后输出分类的时候,最好有个归一化层
(head): Sequential(
(global_pool): SelectAdaptivePool2d (pool_type=avg, flatten=Identity())
(norm): LayerNorm2d((512,), eps=1e-06, elementwise_affine=True)
(flatten): Flatten(start_dim=1, end_dim=-1)
(drop): Identity()
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
self.num_features = dims[len(dims) - 1]
self.head = nn.Sequential(
nn.AdaptiveAvgPool2d((1, 1)), # [15,64,16,16] --> [15,64,1,1]
nn.GroupNorm(1, self.num_features, eps=1e-06),
# nn.BatchNorm2d(self.num_features),
nn.Flatten(1), # [15,64,1,1] --> [15,64]
nn.Linear(self.num_features, num_classes) # [15,64] --> [15,10]
)
3,在模型Block当中处理的特征图Feature Map,size越小,运行速度越快
(*) 比如下面的例子当中,8*8 运行的速度就比 16*16运行的速度快。
1)self.embedding = nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1, bias=False) # [N, C, 16, 16]
2)self.embedding = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2)) # [N, C, 8, 8]
4,在BackBone当中添加BN层,更能大概了提升模型的泛化能力
self.query = nn.Sequential(
nn.Conv2d(n_dims, n_dims, kernel_size=1),
nn.BatchNorm2d(n_dims, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
self.key = nn.Sequential(
nn.Conv2d(n_dims, n_dims, kernel_size=1),
nn.BatchNorm2d(n_dims, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
self.value = nn.Conv2d(n_dims, n_dims, kernel_size=1)
5,对Stem的处理,用2段代替1段,效果更好
1)1段长什么样呢? 直接将 32*32 转化成 8*8 的featureMap进行输出
self.stem = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2)) # [N, C, 8, 8]
2)2段处理,将32*32 经过2段进行转化成 8*8 的featureMap进行输出
class Stem(nn.Module):
def __init__(self):
super(Stem, self).__init__()
self.step1 = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.GELU()
)
self.step2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.GELU()
)
def forward(self, x):
x = self.step1(x)
x = self.step2(x)
return x
6,当GPU不够的时候,可以尝试缩小特征图的大小
相当于是将原始图片缩小了 8倍(stride1=2, stride=4; t = stride1 * stride2 )
class Stem(nn.Module):
def __init__(self):
super(Stem, self).__init__()
self.step1 = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.GELU()
)
self.step2 = nn.Sequential(
# nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)),
nn.Conv2d(32, 64, kernel_size=(5, 5), stride=(4, 4), padding=(2, 2)),
nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.GELU()
)
# [128, 256, 256, 3] / [64, 3, 32, 32]
def forward(self, x):
x = self.step1(x)
x = self.step2(x)
return x
7,当模型开始的几个Epoch出现 val_loss不降反增的时候,大概率是过拟合了
1)尝试减少模型的层数
2)注意力机制transformer 是很吃参数,很吃性能的一个事情
3)通过缩小特征图的尺寸,增加batch_size 可以有效的提升 val_acc
4)在训练开始,对数据进行处理,将其分布分散到均值为0方差为1的区间,利于训练
5)在所数据集切分train_test_split的时候,添加一个随机种子 random_state = 123
8,自定义的数据集如何添加transform
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
class CustomDataset(Dataset):
def __init__(self, X, Y, transform=None):
self.X = X
self.Y = Y
self.transform = transform
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
img = self.X[idx]
target = self.Y[idx]
if self.transform:
img = self.transform(img)
return img, target
def load_train_test_dataset():
images, labels = load_images_and_labels()
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=random_seed)
# 将 NumPy 数组转换为 PyTorch 张量
x_train_tensor = torch.Tensor(x_train)
y_train_tensor = torch.Tensor(y_train)
x_test_tensor = torch.Tensor(x_test)
y_test_tensor = torch.Tensor(y_test)
x_train_tensor = x_train_tensor.permute(0, 3, 1, 2)
x_test_tensor = x_test_tensor.permute(0, 3, 1, 2)
train_transform = transforms.Compose([
# transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# 创建 TransformDataset
# train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
# test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
train_dataset = CustomDataset(x_train_tensor, y_train_tensor, transform=train_transform)
test_dataset = CustomDataset(x_test_tensor, y_test_tensor, transform=train_transform)
# 创建 DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
return train_loader, train_dataset, test_loader, test_dataset
if __name__ == '__main__':
train_loader, train_dataset, test_loader, test_dataset = load_train_test_dataset()
for idx, (data, target) in enumerate(train_loader): # enumerate(train_loader) 用于遍历train_loader,同时跟踪索引idx和数据-目标对(CIFAR10_Data, target)
data, target = data.cuda(), target.long().cuda() # CIFAR10_Data.cuda(): 这假设data是一个PyTorch张量,并将张量移到GPU上进行GPU加速的计算
output = model(data)
9,当遇到最后几个百分点无法提升的问题时
下面这种情况,也是存在一定程度的过拟合的,通过随机丢掉一些链接;或者减少模型的层来进一步的提升模型的acc
10,在Net当中添加如下的组合,可以提升0.1%的Acc
self.act_bn = nn.Sequential(
nn.GELU(),
nn.Conv2d(n_dims, n_dims, kernel_size=1),
nn.BatchNorm2d(n_dims, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
11,Acc在验证集上,上下震荡原因在哪里呢?
1)数据不平衡:如果数据集中一类样本的数量远远多于另一类样本,模型可能会倾向于预测数量较多的那一类,导致准确率波动。解决方法包括过采样、欠采样或者使用类别权重等方法。
2)模型过拟合:模型在训练集上表现很好,但在验证集上表现不佳,可能是因为模型过拟合了训练集中的噪声。解决方法包括增加训练数据、使用正则化技术或者减小模型复杂度。
3)数据质量差:如果数据集中存在错误、缺失或者不一致的数据,可能会导致模型在验证集上的表现不稳定。解决方法包括数据清洗、数据预处理等。
4)参数调优不足:模型的超参数可能没有进行充分的调优,导致模型在验证集上的表现不稳定。解决方法包括使用交叉验证、网格搜索等方法进行超参数调优。
5)数据集分布不一致:验证集的分布可能与训练集的分布不一致,导致模型在验证集上的表现不稳定。解决方法包括使用分层抽样或者重新划分数据集等。
6)数据集大小不足:如果验证集的大小不足以反映整个数据集的分布,可能会导致模型在验证集上的表现不稳定。解决方法包括增加验证集的大小或者使用交叉验证等。
7)数据集标签错误:如果数据集中存在标签错误,可能会导致模型在验证集上的表现不稳定。解决方法包括检查数据集中的标签是否正确、使用多个验证集等。
8)模型选择不当:如果选择了不适合数据集的模型,可能会导致模型在验证集上的表现不稳定。解决方法包括选择合适的模型、使用集成学习等
12, 构建自己的训练数据集(最一般的方式)
def load_train_test_dataset(args=None):
data_folder = 'F:\dl_workspace\data' # 假设图片都存储在 'data_folder' 目录下
images = []
labels = []
for filename in os.listdir(data_folder): # 遍历目录中的所有文件
if filename.endswith('.png'): # 确保只处理 PNG 图片
file_path = os.path.join(data_folder, filename)
img = cv2.imread(file_path)
img = np.transpose(img, (2, 0, 1))
img = img.astype('float32')
img /= 255.0
images.append(img)
# 根据文件名构造标签
if filename.startswith('0_'):
labels.append(0) # 负样本
elif filename.startswith('1_'):
labels.append(1) # 正样本
X = np.array(images)
y = np.array(labels)
print('X:', len(X), '/ y:', len(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 将数据转换为PyTorch的张量
X_train_tensor = torch.tensor(X_train).to(torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test).to(torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
# 创建TensorDataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
print("________train_dataset / test_dataset________>", train_dataset.__len__(), test_dataset.__len__())
# 创建DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False)
return train_loader, train_dataset, test_loader, test_dataset