import torch
import numpy as np
import torch.nn as nn
x = torch.empty(5,3)
x = torch.rand(5,3)
x = torch.zeros(5,3,dtype=torch.long)
x = torch.tensor([5.5, 3])
x = x.new_ones(5,3)
x = torch.rand_like(x,dtype=torch.float)
y = x.view(15)
y = x.view(-1,5)
a = torch.ones(5)
b = a.numpy()
b[1] = 2
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a,1,out=a)
print(a)
if torch.cuda.is_available():
device = torch.device("cuda")
y = torch.ones_like(a,device = device)
b = b.to(device)
c = b+y
print(c)
print(c.to("cpu",torch.double))
y.to("cpu").data.numpy()
y.cpu().data.numpy()
N, D_in, H, D_out = 64,1000,100,10
x = np.random.randn(N,D_in)
y = np.random.randn(N,D_out)
w1 = np.random.randn(D_in,H)
w2 = np.random.randn(H,D_out)
learning_rate = 1e-6
for it in range(500):
h = x.dot(w1)
h_relu = np.maximum(h,0)
y_pred = h_relu.dot(w2)
loss = np.square(y_pred-y).sum()
print(it,loss)
grad_y_pred = 2.0*(y_pred-y)
grad_w2 = h_relu.T.dot(grad_y_pred)
grad_h_relu = grad_y_pred.dot(w2.T)
grad_h = grad_h_relu.copy()
grad_h[h<0] = 0
grad_w1 = x.T.dot(grad_h)
w1 -= learning_rate*grad_w1
w2 -= learning_rate*grad_w2
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H)
w2 = torch.randn(H,D_out)
learning_rate = 1e-6
for it in range(500):
h = x.mm(w1)
h_relu = h.clamp(min=0)
y_pred = h_relu.mm(w2)
loss = (y_pred-y).pow(2).sum().item()
print(it,loss)
grad_y_pred = 2.0*(y_pred-y)
grad_w2 = h_relu.t().mm(grad_y_pred)
grad_h_relu = grad_y_pred.mm(w2.T)
grad_h = grad_h_relu.clone()
grad_h[h<0] = 0
grad_w1 = x.t().mm(grad_h)
w1 -= learning_rate*grad_w1
w2 -= learning_rate*grad_w2
x = torch.tensor(1.,requires_grad=True)
w = torch.tensor(2.,requires_grad=True)
b = torch.tensor(3.,requires_grad=True)
y = w*x + b
y.backward()
print(x.grad)
print(w.grad)
print(b.grad)
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H,requires_grad=True)
w2 = torch.randn(H,D_out,requires_grad=True)
learning_rate = 1e-6
for it in range(500):
y_pred = x.mm(w1).clamp(min=0).mm(w2)
loss = (y_pred-y).pow(2).sum()
print(it,loss.item())
loss.backward()
with torch.no_grad():
w1 -= learning_rate*w1.grad
w2 -= learning_rate*w2.grad
w1.grad.zero_()
w2.grad.zero_()
'''
# 引入torch.nn之后的模型
'''
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H,requires_grad=True)
w2 = torch.randn(H,D_out,requires_grad=True)
learning_rate = 1e-6
model = nn.Sequential(
nn.Linear(D_in,H,bias=False),
nn.ReLU(),
nn.Linear(H,D_out,bias=False),
)
nn.init.normal_(model[0].weight)
nn.init.normal_(model[2].weight)
loss_func = nn.MSELoss(reduction="sum")
for it in range(500):
y_pred = model(x)
loss = loss_func(y_pred, y)
print(it,loss.item())
model.zero_grad()
loss.backward()
with torch.no_grad():
for param in model.parameters():
param -= learning_rate*param.grad
'''
# 使用优化器之后的模型
'''
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H,requires_grad=True)
w2 = torch.randn(H,D_out,requires_grad=True)
learning_rate = 1e-4
model = nn.Sequential(
nn.Linear(D_in,H,bias=False),
nn.ReLU(),
nn.Linear(H,D_out,bias=False),
)
loss_func = nn.MSELoss(reduction="sum")
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
for it in range(500):
y_pred = model(x)
loss = loss_func(y_pred, y)
print(it,loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
'''
class TwoLayerNet(torch.nn.Module):
def __init__(self,D_in,H,D_out):
super(TwoLayerNet,self).__init__()
self.linear1 = torch.nn.Linear(D_in, H, bias=False)
self.linear2 = torch.nn.Linear(H, D_out, bias=False)
def forward(self, x):
y_pred = self.linear2(self.linear1(x).clamp(min=0))
return y_pred
model = TwoLayerNet(D_in,H,D_out)
fizzbuzz游戏模型
def fizz_buzz_encode(i):
if i % 15 == 0:
return 3
if i % 5 == 0:
return 2
if i % 3 == 0:
return 1
else:return 0
def fizz_buzz_decode(i,prediction):
return [str(i),"fizz","buzz","fizzbuzz"][prediction]
def helper(i):
print(fizz_buzz_decode(i,fizz_buzz_encode(i)))
import numpy as np
import torch
import torch.nn as nn
NUM_DIGITS = 10
def binary_encode(i,num_digits):
bit = np.array([i>>d & 1 for d in range(num_digits)][::-1])
print(bit)
return bit
trX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range(101,2**NUM_DIGITS)])
trY = torch.LongTensor([fizz_buzz_encode(i) for i in range(101, 2 ** NUM_DIGITS)])
NUM_HIDDEN = 100
model = nn.Sequential(
nn.Linear(NUM_DIGITS,NUM_HIDDEN),
nn.ReLU(),
nn.Linear(NUM_HIDDEN,4)
)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr = 0.05)
batch_size = 128
for epoch in range(1000):
for start in range(0,len(trX),batch_size):
end = start+batch_size
batchX = trX[start:end]
batchY = trY[start:end]
if torch.cuda.is_available():
batchX = batchX.cuda()
batchY = batchY.cuda()
y_pred = model(batchX)
loss = loss_fn(y_pred,batchY)
print("epoch:",epoch,loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
testX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range(0,101)])
if torch.cuda.is_available():
testX = testX.cuda()
with torch.no_grad():
testY = model(testX)
predition = zip(range(0,101),testY.max(1)[1].data.tolist())
print([fizz_buzz_decode(i,x) for i,x in predition])