记录贴:pytorch学习Part3
一、激活函数、损失函数
a = torch. linspace( - 100 , 100 , 10 )
torch. sigmoid( a)
二、求梯度
from torch. nn import functional as F
x = torch. ones( 1 )
w = torch. full( [ 1 ] , 2. )
w. requires_grad_( )
mse = F. mse_loss( torch. ones( 1 ) , x* w)
mse. backward( )
torch. autograd. grad( mse, [ w] )
三、单层感知机
x = torch. randn( 1 , 10 )
w = torch. randn( 1 , 10 , requires_grad= True )
o = torch. sigmoid( x@w. t( ) )
loss = F. mse_loss( torch. ones( 1 , 1 ) , o)
loss. backward( )
w. grad
四、多层感知机
x = torch. randn( 1 , 10 )
w = torch. randn( 2 , 10 , requires_grad= True )
o = torch. sigmoid( x@w. t( ) )
loss = F. mse_loss( torch. ones( 1 , 1 ) , o)
loss. backward( )
五、链式法则
from torch import autograd
x = torch. tensor( 1. )
w1 = torch. tensor( 2. , requires_grad= True )
b1 = torch. tensor( 1. )
w2 = torch. tensor( 2. , requires_grad= True )
b2 = torch. tensor( 1. )
y1 = x* w1 + b1
y2 = y1* w2 + b2
dy2_dy1 = autograd. grad( y2, [ y1] , retain_graph= True ) [ 0 ]
dy1_dw1 = autograd. grad( y1, [ w1] , retain_graph= True ) [ 0 ]
dy2_dw1 = autograd. grad( y2, [ w1] , retain_graph= True ) [ 0 ]
六、优化实例——找出极值
def himmelblau ( x) :
return ( x[ 0 ] ** 2 + x[ 1 ] - 11 ) ** 2 + ( x[ 0 ] + x[ 1 ] ** 2 - 7 ) ** 2
x = torch. tensor( [ 0. , 0. ] , requires_grad= True )
optimizer = torch. optim. Adam( [ x] , lr= 1e-3 )
for step in range ( 20000 ) :
pred = himmelblau( x)
optimizer. zero_grad( )
pred. backward( )
optimizer. step( )
if step % 2000 == 0 :
print ( 'step{}:x={},f(x)={}'
. format ( step, x. tolist( ) , pred. item( ) ) )
七、手写数字分类
import torch
import torch. optim as optim
from torch. nn import functional as F
import torch. nn as nn
import torchvision
w1, b1 = torch. randn( 200 , 784 , requires_grad= True ) , torch. zeros( 200 , requires_grad= True )
w2, b2 = torch. randn( 200 , 200 , requires_grad= True ) , torch. zeros( 200 , requires_grad= True )
w3, b3 = torch. randn( 10 , 200 , requires_grad= True ) , torch. zeros( 10 , requires_grad= True )
torch. nn. init. kaiming_normal_( w1)
torch. nn. init. kaiming_normal_( w2)
torch. nn. init. kaiming_normal_( w3)
def forward ( x) :
x = x@w1. t( ) + b1
x = F. relu( x)
x = x@w2. t( ) + b2
x = F. relu( x)
x = x@w3. t( ) + b3
x = F. relu( x)
return x
learning_rate = 0.01
optimizer = optim. SGD( [ w1, b1, w2, b2, w3, b3] , lr = learning_rate)
criteon = nn. CrossEntropyLoss( )
for epoch in range ( epochs) :
for batch_idx, ( data, target) in enumerate ( train_loader) :
data = data. view( - 1 , 28 * 28 )
logits = forward( data)
loss = criteon( logits, target)
optimizer. zero_grad( )
loss. backward( )
optimizer. step( )
if batch_idx % 100 == 0 :
print ( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}' . format (
epoch, batch_idx * len ( data) , len ( train_loader. dataset) ,
100. * batch_idx / len ( train_loader) , loss. item( ) ) )
test_loss = 0
correct = 0
for data, target in test_loader:
data = data. view( - 1 , 28 * 28 )
logits = forward( data)
test_loss += criteon( logits, target) . item( )
pred = logits. data. max ( 1 ) [ 1 ]
correct += pred. eq( target. data) . sum ( )
test_loss /= len ( test_loader. dataset)
print ( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n' . format (
test_loss, correct, len ( test_loader. dataset) ,
100. * correct / len ( test_loader. dataset) ) )
八、其他组件
1、线性层、dropout与激活函数
class MLP ( nn. Module) :
def __init__ ( self) :
super ( MLP, self) . __init__( )
self. model = nn. Sequential(
nn. Linear( 784 , 200 ) ,
nn. Dropout( 0.5 ) ,
nn. LeakyReLU( inplace = True ) ,
nn. Linear( 200 , 200 ) ,
nn. Dropout( 0.5 ) ,
nn. LeakyReLU( inplace = True ) ,
nn. Linear( 200 , 10 ) ,
nn. LeakyReLU( inplace = True )
)
def forward ( self, x) :
x = self. model( x)
return x
2、GPU加速、动量与正则化
device = torch. device( 'cuda:0' )
net = MLP( ) . to( device)
optimizer = optim. SGD( net. parameters( ) , lr = learning_rate, momentum= momentum, weight_decay = 0.01 )
criteon = nn. CrossEntropyLoss( ) . to( device)
regularization_loss = 0
for param in model. parameters( ) :
regularization_loss += torch. sum ( torch. abs ( param) )
classify_loss = criteon( logits, target)
loss = classify_loss + 0.01 * regularization_loss
optimizer. zero_grad( )
loss. backward( )
optimizer. step( )
3、学习率调整
schedule = ReduceLROnPlateau( optimizer, 'min' ,factor= 0.1 , patience)
scheduler = StepLR( optimizer, step_size = 30 , gamma = 0.1 )