def forward(self, inputs):
""" Calls extract_features to extract features, applies final linear layer, and returns logits. """
# Convolution layers
x = self.extract_features(inputs)
# Pooling and final linear layer
x = F.adaptive_avg_pool2d(x, 1).squeeze(-1).squeeze(-1)
if self._dropout:
x = F.dropout(x, p=self._dropout, training=self.training)
x = self._fc(x)
return x
修改分类输出层1、,用in_features,得到,该层的输入,重写这一层
from efficientnet_pytorch import EfficientNet
from torch import nn
model = EfficientNet.from_pretrained('efficientnet-b5')
feature = model._fc.in_features
model._fc = nn.Linear(in_features=feature,out_features=45,bias=True)
print(model)
修改分类输出层2、 用 out_features,得到该层的输出,直接修改分类输出个数
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b5')
model._fc.out_features = 9
print(model)
不同层,不同学习率设置
class net(nn.Module):
def __init__(self):
super(net, self).__init__()
self.conv1 = nn.Conv2d(3, 64, 1)
self.conv2 = nn.Conv2d(64, 64, 1)
self.conv3 = nn.Conv2d(64, 64, 1)
self.conv4 = nn.Conv2d(64, 64, 1)
self.conv5 = nn.Conv2d(64, 64, 1)
def forward(self, x):
out = conv5(conv4(conv3(conv2(conv1(x)))))
return out
————————————————
版权声明:本文为CSDN博主「BinWang-cvlab」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/wangbin12122224/article/details/79949824
某个层,学习率更大一点,其他层,用统一的学习率
net = net()
lr = 0.1
conv5_params = list(map(id, net.conv5.parameters())) #这一层学习率,不一样,lr * 100
base_params = filter(lambda p: id(p) not in conv5_params, ##过滤 conv5
net.parameters())
optimizer = torch.optim.SGD([
{'params': base_params},
{'params': net.conv5.parameters(), 'lr': lr * 100} ],
, lr=lr, momentum=0.9)
多个层,写法conv5 conv4是定义结构的网络的名字,根据不同的网络结构自己修改名字
conv5_params = list(map(id, net.conv5.parameters()))
conv4_params = list(map(id, net.conv4.parameters()))
base_params = filter(lambda p: id(p) not in conv5_params + conv4_params,
net.parameters()) ##注意写法是这样的 用 +
optimizer = torch.optim.SGD([
{'params': base_params},
{'params': net.conv5.parameters(), 'lr': lr * 100},
{'params': net.conv4.parameters(), 'lr': lr * 100},
, lr=lr, momentum=0.9)
学习率设置完成后的学习率衰减,操作
####################
# model structure
#-------------------
model = Mymodel()
if use_gpu:
model = model.cuda()
####################
# loss
#-------------------
criterion = nn.CrossEntropyLoss()
####################
# optimizer
#-------------------
ignored_params = list(map(id, model.ViewModel.viewclassifier.parameters())) + list(map(id, model.Block.parameters()))
base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
optimizer_ft = optim.SGD([
{'params': base_params, 'lr': 0.01},
{'params': model.ViewModel.viewclassifier.parameters(), 'lr': 0.001},
{'params': model.Block.parameters(), 'lr': 0.03}],
weight_decay=1e-3, momentum=0.9, nesterov=True)
####################
#** Set lr_decay **
#-------------------
##能分别设定不同层的衰减率
def adjust_lr(epoch):
step_size = 60
lr = args.lr * (0.1 ** (epoch // 30)) #epoch // 30取整操作, 0 1 2 3 .。。。。
for g in optimizer.param_groups:
g['lr'] = lr * g.get('lr')
######################################
### optimizer.param_groups 类型与内容
[
{ 'params': base_params, 'lr': 0.01, 'momentum': 0.9, 'dampening': 0,
'weight_decay': 0.001, 'nesterov': True, 'initial_lr': 0.01 },
{ 'params': model.ViewModel.viewclassifier.parameters(), 'lr': 0.001,
'momentum': 0.9, 'dampening': 0, 'weight_decay': 0.001, 'nesterov': True,
'initial_lr': 0.001 },
{ 'params': model.Block.parameters(), 'lr': 0.03, 'momentum': 0.9,
'dampening': 0, 'weight_decay': 0.001, 'nesterov': True, 'initial_lr':
0.03 }
]
### optimizer.param_groups 类型与内容
######################################
for epoch in range(start_epoch, args.epochs):
adjust_lr(epoch) # 每epoch更新一次。
model.train(True) # Set model to training mode
....
#### 或者
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.1)
for epoch in range(start_epoch, args.epochs):
scheduler.step() # put it before model.train(True)
model.train(True) # Set model to training mode
学习率设置,其他方式
def lr_poly(base_lr, iter, max_iter, power):
return base_lr*((1-float(iter)/max_iter)**(power))
def adjust_learning_rate(optimizer, learning_rate, i_iter, max_iter, power):
"""Sets the learning rate to the initial LR divided by 5 at 60th, 120th and 160th epochs"""
lr = lr_poly(learning_rate, i_iter, max_iter, power)
optimizer.param_groups[0]['lr'] = lr
return lr
————————————————
版权声明:本文为CSDN博主「咆哮的阿杰」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/qq_34914551/article/details/87699317
冻结某个层的写法,优化器需要filter 冻结的层
for name, p in net.named_parameters(): #两种循环网络的名字
for p in net.parameters():
##冻结,参数
model = EfficientNet.from_pretrained(args.arch,num_classes=29)
for param in model._bn0.parameters(): #网络结构的名字,想冻结到哪里就写那一层的名字
param.requires_grad = False #直接设置 false
optimizer = optim.SGD( #优化器 需要过滤掉所有的 梯度为0 的东接层
filter(lambda p: p.requires_grad, model.parameters()), # 记住一定要加上filter(),不然会报错
lr=0.01,
weight_decay=1e-5, momentum=0.9, nesterov=True)