pytorch生成一个简单的线性模型

1.准备数据集prepare dataset
2.使用类设计模型design model using Class(inherit from nn.Moudle)
3.构造损失函数和优化器Construct loss and optimizer(using Pytorch API)
4.训练循环Training cycle(forward backward and update)

1.准备数据集prepare dataset

# 导入pytorch包
import torch

# 简单的数据集和对应的label
x_data = torch.tensor([[1.0], [2.0], [3.0]])
y_data = torch.tensor([[2.0], [4.0], [6.0]])

2.使用类设计模型design model using Class

class LinearModel(torch.nn.Module):
    # 构造函数
    def __init__(self):
        super(LinearModel, self).__init__()
        # Linear (in_features: int, out_features: int, bias: bool = True, device=None, dtype=None)
        # 实例化对象
        self.linear = torch.nn.Linear(1, 1)

    def forward(self, x):
        # linear是个对象,对象后面加括号,表示创建了一个可调用的对象(callerable)
        y_pred = self.linear(x)
        return y_pred

3.构造损失函数和优化器Construct loss and optimizer

model = LinearModel()
# criterion = torch.nn.MSELoss(size_average=False)
# 使用MSE损失(均方误差损失)作为loss损失
criterion = torch.nn.MSELoss(reduction='sum')
# 优化器使用SGD优化器
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

4.训练循环Training cycle

for epoch in range(1000):
    y_pred = model(x_data)
    # loss是个对象,criterion也是一个对象,对象后面直接接参数相当于调用当前对象的__call__方法
    # 对象后面直接跟参数的前提是这个实例化对象对应的类实现了__call__方法
    loss = criterion(y_pred, y_data)
    print(epoch, loss.item())
    # 梯度置0
    optimizer.zero_grad()
    # 反向传播
    loss.backward()
    # 自动更新,根据梯度,所设置的学习率自动更新
    optimizer.step()

print('w = ', model.linear.weight.item())
print('b = ', model.linear.bias.item())

运行结果

  • 10
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,可以使用 PyTorch 实现一个 Vision Transformer Encoder。首先,需要导入 PyTorch 和其他必要的库。 ```python import torch import torch.nn as nn import torch.nn.functional as F ``` 接下来,定义一个 `PatchEmbedding` 类,用于将输入图像分割成大小为 `patch_size` 的小块,并将每个小块转换为向量。我们可以使用一个卷积层来实现这个过程。 ```python class PatchEmbedding(nn.Module): def __init__(self, image_size=224, patch_size=16, in_channels=3, embed_dim=768): super().__init__() self.image_size = image_size self.patch_size = patch_size self.in_channels = in_channels self.embed_dim = embed_dim self.num_patches = (image_size // patch_size) ** 2 self.patch_embedding = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) def forward(self, x): # Input shape: (batch_size, channels, height, width) batch_size, channels, height, width = x.shape assert height == width == self.image_size, f"Input image size must be {self.image_size}x{self.image_size}" # Patch embedding x = self.patch_embedding(x) # (batch_size, embed_dim, num_patches_h, num_patches_w) x = x.flatten(2).transpose(1, 2) # (batch_size, num_patches, embed_dim) return x ``` 接下来,定义一个 `MultiHeadAttention` 类,用于实现多头自注意力机制。这里我们使用 PyTorch 中的 `MultiheadAttention` 模块。 ```python class MultiHeadAttention(nn.Module): def __init__(self, embed_dim, num_heads, dropout=0.0): super().__init__() self.embed_dim = embed_dim self.num_heads = num_heads self.head_dim = embed_dim // num_heads self.dropout = nn.Dropout(dropout) self.qkv = nn.Linear(embed_dim, embed_dim * 3) self.fc = nn.Linear(embed_dim, embed_dim) self.scale = self.head_dim ** -0.5 def forward(self, x): # Input shape: (batch_size, num_patches, embed_dim) batch_size, num_patches, embed_dim = x.shape # Compute queries, keys, and values qkv = self.qkv(x).reshape(batch_size, num_patches, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4) q, k, v = qkv[0], qkv[1], qkv[2] # Compute attention scores and attention weights attn_scores = (q @ k.transpose(-2, -1)) * self.scale attn_weights = F.softmax(attn_scores, dim=-1) attn_weights = self.dropout(attn_weights) # Compute the weighted sum of values attn_output = attn_weights @ v attn_output = attn_output.transpose(1, 2).reshape(batch_size, num_patches, embed_dim) # Apply a linear layer and residual connection x = self.fc(attn_output) x = self.dropout(x) x = x + attn_output return x ``` 接下来,定义一个 `FeedForward` 类,用于实现前馈神经网络。这里我们使用两个线性层和一个激活函数来实现。 ```python class FeedForward(nn.Module): def __init__(self, embed_dim, hidden_dim, dropout=0.0): super().__init__() self.embed_dim = embed_dim self.hidden_dim = hidden_dim self.dropout = nn.Dropout(dropout) self.fc1 = nn.Linear(embed_dim, hidden_dim) self.fc2 = nn.Linear(hidden_dim, embed_dim) def forward(self, x): # Input shape: (batch_size, num_patches, embed_dim) x = self.fc1(x) x = F.gelu(x) x = self.dropout(x) x = self.fc2(x) x = self.dropout(x) x = x + x return x ``` 最后,定义一个 `TransformerEncoder` 类,它将上述三个模块组合在一起,实现 Vision Transformer Encoder 的功能。 ```python class TransformerEncoder(nn.Module): def __init__(self, num_patches, embed_dim, num_heads, hidden_dim, dropout=0.0): super().__init__() self.num_patches = num_patches self.embed_dim = embed_dim self.num_heads = num_heads self.hidden_dim = hidden_dim self.patch_embedding = nn.Linear(3 * 16 * 16, embed_dim) self.position_embedding = nn.Parameter(torch.randn(1, num_patches + 1, embed_dim)) self.dropout = nn.Dropout(dropout) self.attention = MultiHeadAttention(embed_dim, num_heads, dropout) self.feedforward = FeedForward(embed_dim, hidden_dim, dropout) def forward(self, x): # Input shape: (batch_size, channels, height, width) x = self.patch_embedding(x) x = x.permute(0, 2, 1) x = torch.cat([self.position_embedding[:, :self.num_patches], x], dim=1) x = self.dropout(x) x = self.attention(x) x = self.feedforward(x) return x[:, 1:, :] # Remove the first token, which is the position embedding ``` 现在,我们已经定义了一个 Vision Transformer Encoder。可以使用以下代码测试它: ```python encoder = TransformerEncoder(num_patches=14 * 14, embed_dim=768, num_heads=12, hidden_dim=3072, dropout=0.1) x = torch.randn(1, 3, 224, 224) x = encoder(x) print(x.shape) # Output shape: (1, 196, 768) ``` 这个模型将一个大小为 $224 \times 224$ 的 RGB 图像转换为一个大小为 $196 \times 768$ 的向量序列。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值