__init__() takes 1 positional argument but 4 were given
....
init 前后分别是2个下划线 一共4个
感觉类似Java里面的重写初始化方法。如果原始的_init_,只有self一个参数。
#做一个梯度下降的过程
import torch.nn as nn
N , D_in , H , D_out = 64 , 1000 , 100 , 10
x = torch.randn(N , D_in)
y = torch.randn(N , D_out)
class TwoLayerNet(nn.Module):
def __init__(self , D_in , H , D_out):
super(TwoLayerNet , self).__init__()
self.linear1 = torch.nn.Linear(D_in , H , bias = False)
self.linear2 = torch.nn.Linear(H , D_out , bias = False)
def forward(self , x):
y_pred = self.linear2(self.linear1(x).clamp(min = 0))
return y_pred
model = TwoLayerNet(D_in , H ,D_out)
loss_function = nn.MSELoss(reduction = 'sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters() , lr = learning_rate)
for it in range (500):
y_pred = model(x);
loss = loss_function(y_pred , y)
print(it , loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
import numpy as np
N , D_in , H , D_out = 64 , 1000 , 100 , 10
x = np.random.randn(N , D_in)
y = np.random.randn(N , D_out)
w1 = np.random.randn(D_in , H)
w2 = np.random.randn(H , D_out)
learning_rate = 1e-6
for it in range(500):
#forward pass
x = x-x.mean()/np.sqrt(x.var()) #做一个标准化(其实可以做个对照看看效果)
h = x.dot(w1)
h_relu = np.maximum(0 , h) #64*100
y_pred = h_relu.dot(w2)
#computer loss
loss = np.square(y_pred - y).sum()
print(it , loss.item())
#comuter gradient (命名还是grad_*比较好)
y_pred_gra = 2 * (y_pred - y) #64*10
w2_gra = h_relu.T.dot(y_pred_gra)
h_relu_gra = y_pred_gra.dot(w2.T)
h_gra = h_relu_gra.copy()
h_gra[h<0] = 0 #relu这里的backward
w1_gra = x.T.dot(h_gra)
w1 -= learning_rate*w1_gra
w2 -= learning_rate*w2_gra