pytorch中设置不同的学习率
首先定义一个LeNet网络
import torch
import torch.nn as nn
from torch import optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 6, 5),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Conv2d(6, 16, 5),
nn.ReLU(),
nn.MaxPool2d(2,2)
)
self.classifier = nn.Sequential(
nn.Linear(16 * 5 * 5, 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, 10)
)
def forward(self, x):
x = self.features(x)
x = x.view(-1, 16 * 5 * 5)
x = self.classifier(x)
return x
net = Net()
#为不同子网络设置不同的学习率,在finetune中经常用到
如果对某个参数不指定学习率,就使用最外层的默认学习率
optimizer =optim.SGD([
{'params': net.features.parameters()}, # 学习率为1e-5
{'params': net.classifier.parameters(), 'lr': 1e-2}
], lr=1e-5)
#只为两个全连接层设置较大的学习率,其余层的学习率较小
special_layers = nn.ModuleList([net.classifier[0], net.classifier[3]])
special_layers_params = list(map(id, special_layers.parameters()))
base_params = filter(lambda p: id(p) not in special_layers_params,
net.parameters())
optimizer = t.optim.SGD([
{'params': base_params},
{'params': special_layers.parameters(), 'lr': 0.01}
], lr=0.001 )
optimizer
或者:
conv1_params = list(map(id, net.conv1.parameters()))
conv2_params = list(map(id, net.conv2.parameters()))
base_params = filter(lambda p: id(p) not in conv1_params + conv2_params,
net.parameters())
params = [{'params': base_params},
{'params': net.conv1.parameters(), 'lr': lr * 100},
{'params': net.conv2.parameters(), 'lr': lr * 100}]
optimizer = torch.optim.SGD(params, lr=lr, momentum=0.9)