net.apply:
model.apply(fn)
会递归地将函数fn
应用到父模块的每个子模块submodule
,也包括model
这个父模块自身。
net.train()与net.eval()(在有dropout层,归一化,sofotmax里面也有归一化,的时候应该告诉网络这是train)
因为:
dropout在训练的时候起作用,在推断的时候不应该对输入加入噪声(dropout相当于正则化)
batch normalization 也只能在训练的时候使用
class Accumulator: #@save
"""在`n`个变量上累加。"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
enumerate(sequence, [start=0])
>>> seasons = ['Spring', 'Summer', 'Fall', 'Winter']
>>> list(enumerate(seasons))
[(0, 'Spring'), (1, 'Summer'), (2, 'Fall'), (3, 'Winter')]
#LeNet的结构
Sequential(
(0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): Sigmoid()
(2): AvgPool2d(kernel_size=2, stride=2, padding=0)
(3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(4): Sigmoid()
(5): AvgPool2d(kernel_size=2, stride=2, padding=0)
(6): Flatten(start_dim=1, end_dim=-1)
(7): Linear(in_features=400, out_features=120, bias=True)
(8): Sigmoid()
(9): Linear(in_features=120, out_features=84, bias=True)
(10): Sigmoid()
(11): Linear(in_features=84, out_features=10, bias=True)
)
loss= CrossE
optimizer = SGD