pytorh笔记

import torch
import numpy as np
import torch.nn as nn
#构建一个随机初始化矩阵
x = torch.empty(5,3)
#tensor([[7.1118e-04, 1.7444e+28, 7.3909e+22],
#[4.5828e+30, 3.2483e+33, 1.9690e-19],
#[6.8589e+22, 1.3340e+31, 1.1708e-19],
#[7.2128e+22, 9.2216e+29, 7.5546e+31],
#[1.6932e+22, 3.0728e+32, 2.9514e+29]])
x = torch.rand(5,3)
#tensor([[0.8020, 0.6358, 0.6455],
#[0.0177, 0.7960, 0.4162],
#[0.9409, 0.3082, 0.5145],
#[0.4192, 0.8756, 0.8792],
#[0.4861, 0.3633, 0.8668]])
#构建一个全部为0,类型为long的矩阵
x = torch.zeros(5,3,dtype=torch.long) # or x = torch.zeros(5,3).long()
#tensor([[0, 0, 0],
#[0, 0, 0],
#[0, 0, 0],
#[0, 0, 0],
#[0, 0, 0]])
#torch.int64
#从数据直接构建tensor
x = torch.tensor([5.5, 3])
#tensor([5.5000, 3.0000])
#从已有tensor构建新的tensor,会继承前面tensor的特征:维度,数据类型
x = x.new_ones(5,3)
#tensor([[1., 1., 1.],
#[1., 1., 1.],
#[1., 1., 1.],
#[1., 1., 1.],
#[1., 1., 1.]])
#torch.float32
#产生跟x形状相同的tensor
x = torch.rand_like(x,dtype=torch.float)
#tensor([[0.3385, 0.5659, 0.8790],
#[0.1863, 0.7166, 0.9285],
#[0.7175, 0.3495, 0.4613],
#[0.0323, 0.4767, 0.8096],
#[0.4523, 0.0591, 0.5974]])
#torch.float32
#得到tensor的形状
#x.shape or x.size()
y = x.view(15)
#tensor([0.4672, 0.4300, 0.2636, 0.0228, 0.3003, 0.1342, 0.6775, 0.8497, 0.2474,
#0.3502, 0.5224, 0.8243, 0.5169, 0.9832, 0.1073])
y = x.view(-1,5)
#tensor([[0.5895, 0.0579, 0.8686, 0.0022, 0.3773],
#[0.5374, 0.7023, 0.8232, 0.6809, 0.0653],
#[0.0365, 0.0062, 0.7721, 0.8721, 0.7264]])
#numpy与tensor之间的转化
a = torch.ones(5)
#tensor([1., 1., 1., 1., 1.])
b = a.numpy()
#[1. 1. 1. 1. 1.]
#numpy与array共享存储空间
b[1] = 2
#tensor([1., 2., 1., 1., 1.])
#[1. 2. 1. 1. 1.]
a = np.ones(5)
b = torch.from_numpy(a)
#a:[1. 1. 1. 1. 1.]
#b:tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
np.add(a,1,out=a)
#[2. 2. 2. 2. 2.]
#tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

print(a)
#print(torch.cuda.is_available())#false
#tensor on GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(a,device = device)
    b = b.to(device)
    c = b+y
    print(c)
    print(c.to("cpu",torch.double))
y.to("cpu").data.numpy()#把y搬回GPU,再把y转换成numpy
y.cpu().data.numpy()
#用Numpy实现两层神经网络
#一个全连接relu神经网络,一个隐藏层,没有bias,用来从x预测y,使用L2 Loss
#hidden = Wx+b
#a = relu(hidden) = max(0,hidden)
#步骤如下
#forward pass
#loss
#backword pass
N, D_in, H, D_out = 64,1000,100,10
# 随机创建一些训练数据

x = np.random.randn(N,D_in)
y = np.random.randn(N,D_out)
w1 = np.random.randn(D_in,H)
w2 = np.random.randn(H,D_out)

learning_rate = 1e-6
for it in range(500):
    # Forward pass
    h = x.dot(w1)
    h_relu = np.maximum(h,0)#N*H
    y_pred = h_relu.dot(w2)#N*D_out
    # compute loss  MSE
    loss = np.square(y_pred-y).sum()
    print(it,loss)#
    # backword pass
    # compute the gradient
    grad_y_pred = 2.0*(y_pred-y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)

    # update weights of w1 and w2
    w1 -= learning_rate*grad_w1
    w2 -= learning_rate*grad_w2

# 将上面的模型改成在pytorch上的模型

x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H)
w2 = torch.randn(H,D_out)
learning_rate = 1e-6
for it in range(500):
    # Forward pass
    h = x.mm(w1)
    h_relu = h.clamp(min=0)#N*H
    y_pred = h_relu.mm(w2)#N*D_out
    # compute loss  MSE
    loss = (y_pred-y).pow(2).sum().item()
    print(it,loss)#
    # backword pass
    # compute the gradient
    grad_y_pred = 2.0*(y_pred-y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.T)
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0
    grad_w1 = x.t().mm(grad_h)

    # update weights of w1 and w2
    w1 -= learning_rate*grad_w1
    w2 -= learning_rate*grad_w2


x = torch.tensor(1.,requires_grad=True)
w = torch.tensor(2.,requires_grad=True)
b = torch.tensor(3.,requires_grad=True)
y = w*x + b # y = 2*1 + 3
y.backward()
print(x.grad)
print(w.grad)
print(b.grad)
#tensor(2.)
#tensor(1.)
#tensor(1.)
# 将上面的模型再次精简

x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H,requires_grad=True)
w2 = torch.randn(H,D_out,requires_grad=True)
learning_rate = 1e-6
for it in range(500):
    # Forward pass
    y_pred = x.mm(w1).clamp(min=0).mm(w2)#N*D_out
    # compute loss  MSE
    loss = (y_pred-y).pow(2).sum()#computation graph
    print(it,loss.item())#
    # backword pass
    # compute the gradient
    # w1.grad.zero_()#清理grad
    # w2.grad.zero_()
    loss.backward()
    with torch.no_grad():
        # update weights of w1 and w2
        w1 -= learning_rate*w1.grad
        w2 -= learning_rate*w2.grad
        w1.grad.zero_()
        w2.grad.zero_()
'''
# 引入torch.nn之后的模型
'''
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H,requires_grad=True)
w2 = torch.randn(H,D_out,requires_grad=True)
learning_rate = 1e-6
model = nn.Sequential(
    nn.Linear(D_in,H,bias=False),
    nn.ReLU(),
    nn.Linear(H,D_out,bias=False),
)
#将参数初始化为normal效果更好
nn.init.normal_(model[0].weight)#依次初始化每一层参数
#model[1]是relu层,没有参数
nn.init.normal_(model[2].weight)
#model = model.cuda() 如果在cuda上
loss_func = nn.MSELoss(reduction="sum")
#定义优化器
#optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
for it in range(500):
    # Forward pass
    y_pred = model(x)#N*D_out  model.forward()
    # compute loss  MSE
    loss = loss_func(y_pred, y)#computation graph
    print(it,loss.item())#
    # backword pass
    # compute the gradient
    # w1.grad.zero_()#清理grad
    model.zero_grad()#将所有grad清零
    loss.backward()
    with torch.no_grad():
        # update weights of w1 and w2
        for param in model.parameters():
            param -= learning_rate*param.grad
'''
# 使用优化器之后的模型
'''
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H,requires_grad=True)
w2 = torch.randn(H,D_out,requires_grad=True)
learning_rate = 1e-4
model = nn.Sequential(
    nn.Linear(D_in,H,bias=False),
    nn.ReLU(),
    nn.Linear(H,D_out,bias=False),
)
#将参数初始化为normal效果更好
# nn.init.normal_(model[0].weight)#依次初始化每一层参数
#model[1]是relu层,没有参数
#nn.init.normal_(model[2].weight)
#model = model.cuda() 如果在cuda上
loss_func = nn.MSELoss(reduction="sum")
#定义优化器
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
for it in range(500):
    # Forward pass
    y_pred = model(x)#N*D_out  model.forward()
    # compute loss  MSE
    loss = loss_func(y_pred, y)#computation graph
    print(it,loss.item())#
    # backword pass
    # compute the gradient
    # w1.grad.zero_()#清理grad
    optimizer.zero_grad()#将所有grad清零
    loss.backward()#求导
    #updata model parameters
    # optimizer 一次将所有参数更新
    optimizer.step()
'''
# 另一种模型定义方式
class TwoLayerNet(torch.nn.Module):
    def __init__(self,D_in,H,D_out):
        super(TwoLayerNet,self).__init__()
        #define model architecture
        self.linear1 = torch.nn.Linear(D_in, H, bias=False)
        self.linear2 = torch.nn.Linear(H, D_out, bias=False)

    def forward(self, x):
        y_pred = self.linear2(self.linear1(x).clamp(min=0))
        return y_pred
# 模型初始化
model = TwoLayerNet(D_in,H,D_out)

fizzbuzz游戏模型

def fizz_buzz_encode(i):
    if i % 15 == 0:
        return 3
    if i % 5 == 0:
        return 2
    if i % 3 == 0:
        return 1
    else:return 0

def fizz_buzz_decode(i,prediction):
    return [str(i),"fizz","buzz","fizzbuzz"][prediction]

def helper(i):
    print(fizz_buzz_decode(i,fizz_buzz_encode(i)))

# for i in range(10):
#     helper(i)

import numpy as np
import torch
import torch.nn as nn

NUM_DIGITS = 10

def binary_encode(i,num_digits):
    bit = np.array([i>>d & 1 for d in range(num_digits)][::-1])
    print(bit)
    return bit


# binary_encode(1,NUM_DIGITS)
trX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range(101,2**NUM_DIGITS)])
trY = torch.LongTensor([fizz_buzz_encode(i) for i in range(101, 2 ** NUM_DIGITS)])
# 用pytorch定义模型
NUM_HIDDEN = 100
model = nn.Sequential(
    nn.Linear(NUM_DIGITS,NUM_HIDDEN),
    nn.ReLU(),
    nn.Linear(NUM_HIDDEN,4)
)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr = 0.05)
# 以下是模型训练代码
batch_size = 128
for epoch in range(1000):
    for start in range(0,len(trX),batch_size):
        end = start+batch_size
        batchX = trX[start:end]
        batchY = trY[start:end]
        if torch.cuda.is_available():
            batchX = batchX.cuda()
            batchY = batchY.cuda()
        y_pred = model(batchX)
        loss = loss_fn(y_pred,batchY)
        print("epoch:",epoch,loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

testX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range(0,101)])
if torch.cuda.is_available():
    testX = testX.cuda()
with torch.no_grad():
    testY = model(testX)
predition = zip(range(0,101),testY.max(1)[1].data.tolist())
print([fizz_buzz_decode(i,x) for i,x in predition])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值