Lucidrains 系列项目源码解析（四）-CSDN博客

`.\lucidrains\alphafold2\training_scripts\datasets\init.py`

# 定义一个名为calculate_area的函数，用于计算矩形的面积
def calculate_area(length, width):
    # 计算矩形的面积
    area = length * width
    # 返回计算得到的面积
    return area

`.\lucidrains\alphafold2\training_scripts\deepspeed.py`

# 定义一个名为calculate_area的函数，用于计算矩形的面积
def calculate_area(length, width):
    # 计算矩形的面积
    area = length * width
    # 返回计算得到的面积
    return area

`.\lucidrains\alphafold2\training_scripts\lightning.py`

# 定义一个名为calculate_area的函数，用于计算矩形的面积
def calculate_area(length, width):
    # 计算矩形的面积
    area = length * width
    # 返回计算得到的面积
    return area

`.\lucidrains\alphafold2\train_end2end.py`

# 导入所需的库
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
import torch.nn.functional as F
from einops import rearrange

# 导入数据处理相关的库
import sidechainnet as scn
from sidechainnet.sequence.utils import VOCAB
from sidechainnet.structure.build_info import NUM_COORDS_PER_RES

# 导入模型相关的库
from alphafold2_pytorch import Alphafold2
import alphafold2_pytorch.constants as constants

from se3_transformer_pytorch import SE3Transformer
from alphafold2_pytorch.utils import *

# 定义常量
FEATURES = "esm" # 特征类型
DEVICE = None # 设备类型，默认为cuda，如果不可用则为cpu
NUM_BATCHES = int(1e5) # 批次数量
GRADIENT_ACCUMULATE_EVERY = 16 # 梯度累积次数
LEARNING_RATE = 3e-4 # 学习率
IGNORE_INDEX = -100 # 忽略索引
THRESHOLD_LENGTH = 250 # 阈值长度
TO_PDB = False # 是否保存为pdb文件
SAVE_DIR = "" # 保存目录

# 设置设备
DEVICE = constants.DEVICE
DISTOGRAM_BUCKETS = constants.DISTOGRAM_BUCKETS

# 根据特征类型选择嵌入模型
if FEATURES == "esm":
    # 从pytorch hub加载ESM-1b模型
    embedd_model, alphabet = torch.hub.load("facebookresearch/esm", "esm1b_t33_650M_UR50S")
    batch_converter = alphabet.get_batch_converter()

# 定义循环函数
def cycle(loader, cond = lambda x: True):
    while True:
        for data in loader:
            if not cond(data):
                continue
            yield data

# 获取数据
data = scn.load(
    casp_version = 12,
    thinning = 30,
    with_pytorch = 'dataloaders',
    batch_size = 1,
    dynamic_batching = False
)

data = iter(data['train'])
data_cond = lambda t: t[1].shape[1] < THRESHOLD_LENGTH
dl = cycle(data, data_cond)

# 定义模型
model = Alphafold2(
    dim = 256,
    depth = 1,
    heads = 8,
    dim_head = 64,
    predict_coords = True,
    structure_module_dim = 8,
    structure_module_depth = 2,
    structure_module_heads = 4,
    structure_module_dim_head = 16,
    structure_module_refinement_iters = 2
).to(DEVICE)

# 定义优化器
dispersion_weight = 0.1
criterion = nn.MSELoss()
optim = Adam(model.parameters(), lr = LEARNING_RATE)

# 训练循环
for _ in range(NUM_BATCHES):
    for _ in range(GRADIENT_ACCUMULATE_EVERY):
        batch = next(dl)
        seq, coords, mask = batch.seqs, batch.crds, batch.msks

        b, l, _ = seq.shape

        # 准备数据和掩码标签
        seq, coords, mask = seq.argmax(dim = -1).to(DEVICE), coords.to(DEVICE), mask.to(DEVICE)

        # 序列嵌入
        msa, embedds = None

        # 获取嵌入
        if FEATURES == "esm":
            embedds = get_esm_embedd(seq, embedd_model, batch_converter)
        elif FEATURES == "msa":
            pass 
        else:
            pass

        # 预测
        refined = model(
            seq,
            msa = msa,
            embedds = embedds,
            mask = mask
        )

        # 构建侧链容器
        proto_sidechain = sidechain_container(coords_3d, n_aa=batch,
                                              cloud_mask=cloud_mask, place_oxygen=False)

        # 旋转/对齐
        coords_aligned, labels_aligned = Kabsch(refined, coords[flat_cloud_mask])

        # 原子掩码
        cloud_mask = scn_cloud_mask(seq, boolean = False)
        flat_cloud_mask = rearrange(cloud_mask, 'b l c -> b (l c)')

        # 链掩码
        chain_mask = (mask * cloud_mask)[cloud_mask]
        flat_chain_mask = rearrange(chain_mask, 'b l c -> b (l c)')

        # 保存pdb文件
        if TO_PDB: 
            idx = 0
            coords2pdb(seq[idx, :, 0], coords_aligned[idx], cloud_mask, prefix=SAVE_DIR, name="pred.pdb")
            coords2pdb(seq[idx, :, 0], labels_aligned[idx], cloud_mask, prefix=SAVE_DIR, name="label.pdb")

        # 计算损失
        loss = torch.sqrt(criterion(coords_aligned[flat_chain_mask], labels_aligned[flat_chain_mask])) + \
                          dispersion_weight * torch.norm( (1/weights)-1 )

        loss.backward()

    print('loss:', loss.item())

    optim.step()
    optim.zero_grad()

`.\lucidrains\alphafold2\train_pre.py`

# 导入所需的库
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
import torch.nn.functional as F
from einops import rearrange

# 导入自定义库
import sidechainnet as scn
from alphafold2_pytorch import Alphafold2
import alphafold2_pytorch.constants as constants
from alphafold2_pytorch.utils import get_bucketed_distance_matrix

# 常量定义

DEVICE = None # 默认为 cuda（如果可用），否则为 cpu
NUM_BATCHES = int(1e5)
GRADIENT_ACCUMULATE_EVERY = 16
LEARNING_RATE = 3e-4
IGNORE_INDEX = -100
THRESHOLD_LENGTH = 250

# 设置设备

DISTOGRAM_BUCKETS = constants.DISTOGRAM_BUCKETS
DEVICE = constants.DEVICE

# 辅助函数

def cycle(loader, cond = lambda x: True):
    # 无限循环遍历数据加载器
    while True:
        for data in loader:
            if not cond(data):
                continue
            yield data

# 获取数据

# 加载数据集
data = scn.load(
    casp_version = 12,
    thinning = 30,
    with_pytorch = 'dataloaders',
    batch_size = 1,
    dynamic_batching = False
)

# 获取训练数据集的迭代器
data = iter(data['train'])
data_cond = lambda t: t[1].shape[1] < THRESHOLD_LENGTH
dl = cycle(data, data_cond)

# 模型

# 初始化 Alphafold2 模型
model = Alphafold2(
    dim = 256,
    depth = 1,
    heads = 8,
    dim_head = 64
).to(DEVICE)

# 优化器

# 初始化 Adam 优化器
optim = Adam(model.parameters(), lr = LEARNING_RATE)

# 训练循环

# 循环执行指定次数的训练批次
for _ in range(NUM_BATCHES):
    for _ in range(GRADIENT_ACCUMULATE_EVERY):
        # 获取下一个数据批次
        batch = next(dl)
        seq, coords, mask = batch.seqs, batch.crds, batch.msks

        b, l, _ = seq.shape

        # 准备 mask 和 labels

        # 将序列、坐标和 mask 转换为指定设备上的张量
        seq, coords, mask = seq.argmax(dim = -1).to(DEVICE), coords.to(DEVICE), mask.to(DEVICE).bool()
        coords = rearrange(coords, 'b (l c) d -> b l c d', l = l)

        # 获取离散化的距离矩阵
        discretized_distances = get_bucketed_distance_matrix(coords[:, :, 1], mask, DISTOGRAM_BUCKETS, IGNORE_INDEX)

        # 预测

        distogram = model(seq, mask = mask)
        distogram = rearrange(distogram, 'b i j c -> b c i j')

        # 计算损失

        loss = F.cross_entropy(
            distogram,
            discretized_distances,
            ignore_index = IGNORE_INDEX
        )

        # 反向传播
        loss.backward()

    # 打印损失值
    print('loss:', loss.item())

    # 更新优化器参数
    optim.step()
    optim.zero_grad()

`.\lucidrains\AMIE-pytorch\AMIE_pytorch\AMIE_pytorch.py`

# 导入 torch 库
import torch
# 从 torch 库中导入 nn, einsum 模块
from torch import nn, einsum
# 从 torch.nn 模块中导入 Module, ModuleList 类
from torch.nn import Module, ModuleList

# 导入 einops 库中的 rearrange 函数
from einops import rearrange

# 定义函数

# 判断变量是否存在
def exists(v):
    return v is not None

# 如果变量存在则返回该变量，否则返回默认值
def default(v, d):
    return v if exists(v) else d

# 自我评论提示
# 论文中的图 A.15

PROMPT_EVALUATE_EXPLANATION = """
I have a doctor-patient dialogue and the corresponding rating that quantifies its quality according to
the following criterion: <criterion> (e.g., maintaining patient welfare). The rating of the dialogue is
on a scale of 1 to 5 where:

5: <definition> e.g., “Treats patient respectfully, and ensures comfort, safety and dignity”
1: <definition> e.g., “Causes patient physical or emotional discomfort AND jeopardises patient safety”

First, describe which parts of the dialogue are good with respect to the criterion. Then, describe which parts are bad with respect to the criterion. Lastly, summarise the above to explain the
provided rating, using the following format:

Good: ...
Bad: ...
Summary: ...

DIALOGUE: <dialogue>
Rating: <human rating>
EVALUATION:
"""

# 图 A.16

PROMPT_EVALUATE_QUALITATIVE = """
I have a doctor-patient dialogue which I would like you to evaluate on the following criterion:
<criterion> (e.g., maintaining patient welfare). The dialogue should be rated on a scale of 1-5 with
respect to the criterion where:

5: <definition> e.g., “Treats patient respectfully, and ensures comfort, safety and dignity”
1: <definition> e.g., “Causes patient physical or emotional discomfort AND jeopardises patient safety”

Here are some example dialogues and their ratings:
DIALOGUE: <example dialog>
EVALUATION: <example self-generated explanation>
Rating: <example rating>
...

Now, please rate the following dialogue as instructed below. First, describe which parts of the dialogue
are good with respect to the criterion. Then, describe which parts are bad with respect to the criterion.
Third, summarise the above findings. Lastly, rate the dialogue on a scale of 1-5 with respect to the
criterion, according to this schema:

Good: ...
Bad: ...
Summary: ...
Rating: ...

DIALOGUE: <dialogue>
EVALUATION:
"""

# 自我对弈模块

class OuterSelfPlay(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

class InnerSelfPlay(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

class PatientAgent(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

class ClinicalVignetteGenerator(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

class Moderator(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

class DoctorAgent(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

class SimulatedDialogue(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

class Critic(Module):
    def __init__(self):
        super().__init__()
        raise NotImplementedError

# 主类

class AMIE(Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x

`.\lucidrains\AMIE-pytorch\AMIE_pytorch\init.py`

# 从AMIE_pytorch模块中导入AMIE类
from AMIE_pytorch.AMIE_pytorch import AMIE

AMIE - Pytorch (wip)

Implementation of the general framework for AMIE, from the paper Towards Conversational Diagnostic AI, out of Google Deepmind

Reach out to me if you are at least a 3rd year medical student, have kept up with the current state of deep learning, and interested in this project.

Todo

allow for a DPO-like formulation. do not think google deepmind has adopted this across the org just yet.

Citations

@inproceedings{Tu2024TowardsCD,
    title   = {Towards Conversational Diagnostic AI},
    author  = {Tao Tu and Anil Palepu and Mike Schaekermann and Khaled Saab and Jan Freyberg and Ryutaro Tanno and Amy Wang and Brenna Li and Mohamed Amin and Nenad Toma{\vs}ev and Shekoofeh Azizi and Karan Singhal and Yong Cheng and Le Hou and Albert Webson and Kavita Kulkarni and S Sara Mahdavi and Christopher Semturs and Juraj Gottweis and Joelle Barral and Katherine Chou and Greg S. Corrado and Yossi Matias and Alan Karthikesalingam and Vivek Natarajan},
    year    = {2024},
    url     = {https://api.semanticscholar.org/CorpusID:266933212}
}

`.\lucidrains\AMIE-pytorch\setup.py`

# 导入设置工具和查找包工具
from setuptools import setup, find_packages

# 设置包的元数据
setup(
  name = 'AMIE-pytorch',  # 包名
  packages = find_packages(exclude=[]),  # 查找所有包
  version = '0.0.1',  # 版本号
  license='MIT',  # 许可证
  description = 'AMIE',  # 描述
  author = 'Phil Wang',  # 作者
  author_email = 'lucidrains@gmail.com',  # 作者邮箱
  long_description_content_type = 'text/markdown',  # 长描述内容类型
  url = 'https://github.com/lucidrains/AMIE-pytorch',  # URL
  keywords = [
    'artificial intelligence',  # 关键词
    'deep learning',  # 关键词
    'differential diagnosis'  # 关键词
  ],
  install_requires=[  # 安装依赖
    'accelerate',  # 加速库
    'beartype',  # 类型检查库
    'einops>=0.7.0',  # 数据操作库
    'einx>=0.1.2',  # 扩展库
    'torch>=2.0',  # PyTorch
    'tqdm'  # 进度条库
  ],
  classifiers=[  # 分类器
    'Development Status :: 4 - Beta',  # 开发状态
    'Intended Audience :: Developers',  # 目标受众
    'Topic :: Scientific/Engineering :: Artificial Intelligence',  # 主题
    'License :: OSI Approved :: MIT License',  # 许可证
    'Programming Language :: Python :: 3.6',  # 编程语言
  ],
)

`.\lucidrains\anymal-belief-state-encoder-decoder-pytorch\anymal_belief_state_encoder_decoder_pytorch\networks.py`

import torch
from torch import nn
import torch.nn.functional as F
from torch.nn import GRUCell
from torch.distributions import Categorical
from torch.optim import Adam

from einops import rearrange
from einops_exts import check_shape
from einops.layers.torch import Rearrange

from anymal_belief_state_encoder_decoder_pytorch.running import RunningStats

# helper functions

# 检查值是否存在
def exists(val):
    return val is not None

# 冻结神经网络的函数（老师需要被冻结）

# 设置模块是否需要梯度
def set_module_requires_grad_(module, requires_grad):
    for param in module.parameters():
        param.requires_grad = requires_grad

# 冻结所有层
def freeze_all_layers_(module):
    set_module_requires_grad_(module, False)

# 解冻所有层
def unfreeze_all_layers_(module):
    set_module_requires_grad_(module, True)

# 在论文中
# 网络的注意力门控制外部感知，然后将其与信念状态相加
# todo: 确保填充在正确的一侧

# 使用零填充对两个张量进行相加
def sum_with_zeropad(x, y):
    x_dim, y_dim = x.shape[-1], y.shape[-1]

    if x_dim == y_dim:
        return x + y

    if x_dim < y_dim:
        x = F.pad(x, (y_dim - x_dim, 0))

    if y_dim < x_dim:
        y = F.pad(y, (x_dim - y_dim, 0))

    return x + y

# 添加基本的多层感知机（MLP）

class MLP(nn.Module):
    def __init__(
        self,
        dims,
        activation = nn.LeakyReLU,
        final_activation = False
    ):
        super().__init__()
        assert isinstance(dims, (list, tuple))
        assert len(dims) > 2, 'must have at least 3 dimensions (input, *hiddens, output)'

        dim_pairs = list(zip(dims[:-1], dims[1:]))
        *dim_pairs, dim_out_pair = dim_pairs

        layers = []
        for dim_in, dim_out in dim_pairs:
            layers.extend([
                nn.Linear(dim_in, dim_out),
                activation()
            ])

        layers.append(nn.Linear(*dim_out_pair))

        if final_activation:
            layers.append(activation())

        self.net = nn.Sequential(*layers)

    def forward(self, x):
        if isinstance(x, (tuple, list)):
            x = torch.cat(x, dim = -1)

        return self.net(x)

# 学生模型
class Student(nn.Module):
    def __init__(
        self,
        num_actions,
        proprio_dim = 133,
        extero_dim = 52,  # in paper, height samples was marked as 208, but wasn't sure if that was per leg, or (4 legs x 52) = 208
        latent_extero_dim = 24,
        extero_encoder_hidden = (80, 60),
        belief_state_encoder_hiddens = (64, 64),
        extero_gate_encoder_hiddens = (64, 64),
        belief_state_dim = 120,  # should be equal to teacher's extero_dim + privileged_dim (part of the GRU's responsibility is to maintain a hidden state that forms an opinion on the privileged information)
        gru_num_layers = 2,
        gru_hidden_size = 50,
        mlp_hidden = (256, 160, 128),
        num_legs = 4,
        privileged_dim = 50,
        privileged_decoder_hiddens = (64, 64),
        extero_decoder_hiddens = (64, 64),
    ):
        super().__init__()
        assert belief_state_dim > (num_legs * latent_extero_dim)
        self.num_legs = num_legs
        self.proprio_dim = proprio_dim
        self.extero_dim = extero_dim        

        # encoding of exteroception
        # 外部感知的编码
        self.extero_encoder = MLP((extero_dim, *extero_encoder_hidden, latent_extero_dim))

        # GRU related parameters
        # GRU 相关参数
        gru_input_dim = (latent_extero_dim * num_legs) + proprio_dim
        gru_input_dims = (gru_input_dim, *((gru_hidden_size,) * (gru_num_layers - 1)))
        self.gru_cells = nn.ModuleList([GRUCell(input_dim, gru_hidden_size) for input_dim in gru_input_dims])
        self.gru_hidden_size = gru_hidden_size

        # belief state encoding
        # 信念状态编码
        self.belief_state_encoder = MLP((gru_hidden_size, *belief_state_encoder_hiddens, belief_state_dim))

        # attention gating of exteroception
        # 外部感知的注意力门控制
        self.to_latent_extero_attn_gate = MLP((gru_hidden_size, *extero_gate_encoder_hiddens, latent_extero_dim * num_legs))

        # belief state decoder
        # 信念状态解码器
        self.privileged_decoder = MLP((gru_hidden_size, *privileged_decoder_hiddens, privileged_dim))
        self.extero_decoder = MLP((gru_hidden_size, *extero_decoder_hiddens, extero_dim * num_legs))

        self.to_extero_attn_gate = MLP((gru_hidden_size, *extero_gate_encoder_hiddens, extero_dim * num_legs))

        # final MLP to action logits
        # 最终的 MLP 转换为动作的逻辑
        self.to_logits = MLP((
            belief_state_dim + proprio_dim,
            *mlp_hidden
        ))

        self.to_action_head = nn.Sequential(
            nn.LeakyReLU(),
            nn.Linear(mlp_hidden[-1], num_actions)
        )

    def get_gru_hiddens(self):
        device = next(self.parameters()).device
        return torch.zeros((len(self.gru_cells), self.gru_hidden_size))

    def forward(
        self,
        proprio,
        extero,
        hiddens = None,
        return_estimated_info = False,  # for returning estimated privileged info + exterceptive info, for reconstruction loss
        return_action_categorical_dist = False
    ):
        check_shape(proprio, 'b d', d = self.proprio_dim)
        check_shape(extero, 'b n d', n = self.num_legs, d = self.extero_dim)

        latent_extero = self.extero_encoder(extero)
        latent_extero = rearrange(latent_extero, 'b ... -> b (...)')

        # RNN
        # 循环神经网络

        if not exists(hiddens):
            prev_hiddens = (None,) * len(self.gru_cells)
        else:
            prev_hiddens = hiddens.unbind(dim = -2)

        gru_input = torch.cat((proprio, latent_extero), dim = -1)

        next_hiddens = []
        for gru_cell, prev_hidden in zip(self.gru_cells, prev_hiddens):
            gru_input = gru_cell(gru_input, prev_hidden)
            next_hiddens.append(gru_input)

        gru_output = gru_input

        next_hiddens = torch.stack(next_hiddens, dim = -2)

        # attention gating of exteroception
        # 外部感知的注意力门控制

        latent_extero_attn_gate = self.to_latent_extero_attn_gate(gru_output)
        gated_latent_extero = latent_extero * latent_extero_attn_gate.sigmoid()

        # belief state and add gated exteroception
        # 信念状态和添加门控外部感知

        belief_state = self.belief_state_encoder(gru_output)
        belief_state = sum_with_zeropad(belief_state, gated_latent_extero)

        # to action logits
        # 转换为动作的逻辑

        belief_state_with_proprio = torch.cat((
            proprio,
            belief_state,
        ), dim = 1)

        logits = self.to_logits(belief_state_with_proprio)

        pi_logits = self.to_action_head(logits)

        return_action = Categorical(pi_logits.softmax(dim = -1)) if return_action_categorical_dist else pi_logits

        if not return_estimated_info:
            return return_action, next_hiddens

        # belief state decoding
        # for reconstructing privileged and exteroception information from hidden belief states
        # 用于从隐藏的信念状态中重建特权和外部感知信息

        recon_privileged = self.privileged_decoder(gru_output)
        recon_extero = self.extero_decoder(gru_output)
        extero_attn_gate = self.to_extero_attn_gate(gru_output)

        gated_extero = rearrange(extero, 'b ... -> b (...)') * extero_attn_gate.sigmoid()
        recon_extero = recon_extero + gated_extero
        recon_extero = rearrange(recon_extero, 'b (n d) -> b n d', n = self.num_legs)

        # whether to return raw policy logits or action probs wrapped with Categorical
        # 是否返回原始策略逻辑或用 Categorical 包装的动作概率

        return return_action, next_hiddens, (recon_privileged, recon_extero)

# 教师模型
class Teacher(nn.Module):
    def __init__(
        self,
        num_actions,
        proprio_dim = 133,
        extero_dim = 52,  # in paper, height samples was marked as 208, but wasn't sure if that was per leg, or (4 legs x 52) = 208
        latent_extero_dim = 24,
        extero_encoder_hidden = (80, 60),
        privileged_dim = 50,
        latent_privileged_dim = 24,
        privileged_encoder_hidden = (64, 32),
        mlp_hidden = (256, 160, 128),
        num_legs = 4
        ):
        # 调用父类的构造函数
        super().__init__()
        # 初始化属性：腿的数量
        self.num_legs = num_legs
        # 初始化属性：本体维度
        self.proprio_dim = proprio_dim
        # 初始化属性：外部维度
        self.extero_dim = extero_dim
        # 初始化属性：特权维度
        self.privileged_dim = privileged_dim

        # 初始化属性：外部编码器
        self.extero_encoder = MLP((extero_dim, *extero_encoder_hidden, latent_extero_dim))
        # 初始化属性：特权编码器
        self.privileged_encoder = MLP((privileged_dim, *privileged_encoder_hidden, latent_privileged_dim))

        # 初始化属性：转换为逻辑
        self.to_logits = MLP((
            latent_extero_dim * num_legs + latent_privileged_dim + proprio_dim,
            *mlp_hidden
        ))

        # 初始化属性：转换为动作头
        self.to_action_head = nn.Sequential(
            nn.LeakyReLU(),
            nn.Linear(mlp_hidden[-1], num_actions)
        )

        # 初始化属性：转换为价值头
        self.to_value_head = nn.Sequential(
            nn.LeakyReLU(),
            nn.Linear(mlp_hidden[-1], 1),
            Rearrange('... 1 -> ...')
        )

    def forward(
        self,
        proprio,
        extero,
        privileged,
        return_value_head = False,
        return_action_categorical_dist = False
    ):
        # 检查本体形状
        check_shape(proprio, 'b d', d = self.proprio_dim)
        # 检查外部形状
        check_shape(extero, 'b n d', n = self.num_legs, d = self.extero_dim)
        # 检查特权形状
        check_shape(privileged, 'b d', d = self.privileged_dim)

        # 计算外部潜在表示
        latent_extero = self.extero_encoder(extero)
        # 重新排列外部潜在表示
        latent_extero = rearrange(latent_extero, 'b ... -> b (...)')

        # 计算特权潜在表示
        latent_privileged = self.privileged_encoder(privileged)

        # 拼接本体、外部潜在表示和特权潜在表示
        latent = torch.cat((
            proprio,
            latent_extero,
            latent_privileged,
        ), dim = -1)

        # 计算逻辑
        logits = self.to_logits(latent)

        # 计算动作头
        pi_logits = self.to_action_head(logits)

        # 如果不返回价值头，则返回动作头
        if not return_value_head:
            return pi_logits

        # 计算价值头
        value_logits = self.to_value_head(logits)

        # 如果需要返回动作的分类分布，则返回分类分布，否则返回动作头
        return_action = Categorical(pi_logits.softmax(dim = -1)) if return_action_categorical_dist else pi_logits
        return return_action, value_logits
# 定义一个同时管理教师和学生的模块
class Anymal(nn.Module):
    def __init__(
        self,
        num_actions,
        proprio_dim = 133,
        extero_dim = 52,
        privileged_dim = 50,
        num_legs = 4,
        latent_extero_dim = 24,
        latent_privileged_dim = 24,
        teacher_extero_encoder_hidden = (80, 60),
        teacher_privileged_encoder_hidden = (64, 32),
        student_extero_gate_encoder_hiddens = (64, 64),
        student_belief_state_encoder_hiddens = (64, 64),
        student_belief_state_dim = 120,
        student_gru_num_layers = 2,
        student_gru_hidden_size = 50,
        student_privileged_decoder_hiddens = (64, 64),
        student_extero_decoder_hiddens = (64, 64),
        student_extero_encoder_hidden = (80, 60),
        mlp_hidden = (256, 160, 128),
        recon_loss_weight = 0.5
    ):
        super().__init__()
        # 初始化模块的属性
        self.proprio_dim = proprio_dim
        self.num_legs = num_legs
        self.extero_dim = extero_dim

        # 创建学生对象
        self.student = Student(
            num_actions = num_actions,
            proprio_dim = proprio_dim,
            extero_dim = extero_dim,
            latent_extero_dim = latent_extero_dim,
            extero_encoder_hidden = student_extero_encoder_hidden,
            belief_state_encoder_hiddens = student_belief_state_encoder_hiddens,
            extero_gate_encoder_hiddens = student_extero_gate_encoder_hiddens,
            belief_state_dim = student_belief_state_dim,
            gru_num_layers = student_gru_num_layers,
            gru_hidden_size = student_gru_hidden_size,
            mlp_hidden = mlp_hidden,
            num_legs = num_legs,
            privileged_dim = privileged_dim,
            privileged_decoder_hiddens = student_privileged_decoder_hiddens,
            extero_decoder_hiddens = student_extero_decoder_hiddens,
        )

        # 创建教师对象
        self.teacher = Teacher(
            num_actions = num_actions,
            proprio_dim = proprio_dim,
            extero_dim = extero_dim,
            latent_extero_dim = latent_extero_dim,
            extero_encoder_hidden = teacher_extero_encoder_hidden,
            privileged_dim = privileged_dim,
            latent_privileged_dim = latent_privileged_dim,
            privileged_encoder_hidden = teacher_privileged_encoder_hidden,
            mlp_hidden = mlp_hidden,
            num_legs = num_legs
        )

        self.recon_loss_weight = recon_loss_weight

    # 获取观察的运行统计信息
    def get_observation_running_stats(self):
        return RunningStats(self.proprio_dim), RunningStats((self.num_legs, self.extero_dim))

    # 使用教师初始化学生
    def init_student_with_teacher(self):
        self.student.extero_encoder.load_state_dict(self.teacher.extero_encoder.state_dict())
        self.student.to_logits.load_state_dict(self.teacher.to_logits.state_dict())
        self.student.to_action_head.load_state_dict(self.teacher.to_action_head.state_dict())

    # 为教师前向传播定义方法
    def forward_teacher(self, *args, return_value_head = False, **kwargs):
        return self.teacher(*args, return_value_head = return_value_head, **kwargs)

    # 为学生前向传播定义方法
    def forward_student(self, *args, **kwargs):
        return self.student(*args, **kwargs)

    # 用教师作为指导训练学生的主要前向传播
    def forward(
        self,
        proprio,
        extero,
        privileged,
        teacher_states = None,
        hiddens = None,
        noise_strength = 0.1
    ):
        # 将教师设置为评估模式
        self.teacher.eval()
        # 冻结教师的所有层
        freeze_all_layers_(self.teacher)

        with torch.no_grad():
            teacher_proprio, teacher_extero = teacher_states if exists(teacher_states) else (proprio, extero)
            teacher_action_logits = self.forward_teacher(teacher_proprio, teacher_extero, privileged)

        # 添加噪声到外部感知
        noised_extero = extero + torch.rand_like(extero) * noise_strength

        # 学生前向传播
        student_action_logits, hiddens, recons = self.student(proprio, noised_extero, hiddens = hiddens, return_estimated_info = True)

        # 计算特权和去噪外部感知的重建损失
        (recon_privileged, recon_extero) = recons
        recon_loss = F.mse_loss(recon_privileged, privileged) + F.mse_loss(recon_extero, extero)

        # 计算行为损失，也是平方距离？
        behavior_loss = F.mse_loss(teacher_action_logits, student_action_logits)

        # 计算总损失
        loss = behavior_loss + recon_loss * self.recon_loss_weight
        return loss, hiddens

`.\lucidrains\anymal-belief-state-encoder-decoder-pytorch\anymal_belief_state_encoder_decoder_pytorch\ppo.py`

# 导入必要的库
from collections import namedtuple, deque
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from anymal_belief_state_encoder_decoder_pytorch import Anymal
from anymal_belief_state_encoder_decoder_pytorch.networks import unfreeze_all_layers_
from einops import rearrange

# 定义一个命名元组Memory，用于存储经验数据
Memory = namedtuple('Memory', ['state', 'action', 'action_log_prob', 'reward', 'done', 'value'])

# 定义一个数据集类ExperienceDataset，用于存储经验数据
class ExperienceDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data

    def __len__(self):
        return len(self.data[0])

    def __getitem__(self, ind):
        return tuple(map(lambda t: t[ind], self.data))

# 创建一个混洗数据加载器函数
def create_shuffled_dataloader(data, batch_size):
    ds = ExperienceDataset(data)
    return DataLoader(ds, batch_size = batch_size, shuffle = True)

# 定义一个归一化函数，用于对张量进行归一化处理
def normalize(t, eps = 1e-5):
    return (t - t.mean()) / (t.std() + eps)

# 定义一个裁剪值损失函数，用于计算值函数的损失
def clipped_value_loss(values, rewards, old_values, clip):
    value_clipped = old_values + (values - old_values).clamp(-clip, clip)
    value_loss_1 = (value_clipped.flatten() - rewards) ** 2
    value_loss_2 = (values.flatten() - rewards) ** 2
    return torch.mean(torch.max(value_loss_1, value_loss_2))

# 定义一个模拟环境类MockEnv，用于模拟环境状态和动作
class MockEnv(object):
    def __init__(
        self,
        proprio_dim,
        extero_dim,
        privileged_dim,
        num_legs = 4
    ):
        self.proprio_dim = proprio_dim
        self.extero_dim = extero_dim
        self.privileged_dim = privileged_dim
        self.num_legs = num_legs

    def rand_state(self):
        return (
            torch.randn((self.proprio_dim,)),
            torch.randn((self.num_legs, self.extero_dim,)),
            torch.randn((self.privileged_dim,))
        )

    def reset(self):
        return self.rand_state()

    def step(self, action):
        reward = torch.randn((1,))
        done = torch.tensor([False])
        return self.rand_state(), reward, done, None

# 定义一个PPO类，用于执行PPO算法
class PPO(nn.Module):
    def __init__(
        self,
        *,
        env,
        anymal,
        epochs = 2,
        lr = 5e-4,
        betas = (0.9, 0.999),
        eps_clip = 0.2,
        beta_s = 0.005,
        value_clip = 0.4,
        max_timesteps = 10000,
        update_timesteps = 5000,
        lam = 0.95,
        gamma = 0.99,
        minibatch_size = 8300
    ):
        super().__init__()
        assert isinstance(anymal, Anymal)
        self.env = env
        self.anymal = anymal

        self.minibatch_size = minibatch_size
        self.optimizer = Adam(anymal.teacher.parameters(), lr = lr, betas = betas)
        self.epochs = epochs

        self.max_timesteps = max_timesteps
        self.update_timesteps = update_timesteps

        self.beta_s = beta_s
        self.eps_clip = eps_clip
        self.value_clip = value_clip

        self.lam = lam
        self.gamma = gamma

        # 在论文中，他们说传递给teacher的观察值是通过运行均值进行归一化的

        self.running_proprio, self.running_extero = anymal.get_observation_running_stats()

    def learn_from_memories(
        self,
        memories,
        next_states
    ):
        device = next(self.parameters()).device

        # 从内存中检索和准备数据进行训练
        states = []
        actions = []
        old_log_probs = []
        rewards = []
        masks = []
        values = []

        for mem in memories:
            states.append(mem.state)
            actions.append(torch.tensor(mem.action))
            old_log_probs.append(mem.action_log_prob)
            rewards.append(mem.reward)
            masks.append(1 - float(mem.done))
            values.append(mem.value)

        states = tuple(zip(*states))

        # 计算广义优势估计

        next_states = map(lambda t: t.to(device), next_states)
        next_states = map(lambda t: rearrange(t, '... -> 1 ...'), next_states)

        _, next_value = self.anymal.forward_teacher(*next_states, return_value_head = True)
        next_value = next_value.detach()

        values = values + [next_value]

        returns = []
        gae = 0
        for i in reversed(range(len(rewards))):
            delta = rewards[i] + self.gamma * values[i + 1] * masks[i] - values[i]
            gae = delta + self.gamma * self.lam * masks[i] * gae
            returns.insert(0, gae + values[i])

        # 将值转换为torch张量

        to_torch_tensor = lambda t: torch.stack(t).to(device).detach()

        states = map(to_torch_tensor, states)
        actions = to_torch_tensor(actions)
        old_log_probs = to_torch_tensor(old_log_probs)

        old_values = to_torch_tensor(values[:-1])
        old_values = rearrange(old_values, '... 1 -> ...')

        rewards = torch.tensor(returns).float().to(device)

        # 为策略阶段训练准备数据加载器

        dl = create_shuffled_dataloader([*states, actions, old_log_probs, rewards, old_values], self.minibatch_size)

        # 策略阶段训练，类似于原始的PPO

        for _ in range(self.epochs):
            for proprio, extero, privileged, actions, old_log_probs, rewards, old_values in dl:

                dist, values = self.anymal.forward_teacher(
                    proprio, extero, privileged,
                    return_value_head = True,
                    return_action_categorical_dist = True
                )

                action_log_probs = dist.log_prob(actions)

                entropy = dist.entropy()
                ratios = (action_log_probs - old_log_probs).exp()
                advantages = normalize(rewards - old_values.detach())
                surr1 = ratios * advantages
                surr2 = ratios.clamp(1 - self.eps_clip, 1 + self.eps_clip) * advantages

                policy_loss = - torch.min(surr1, surr2) - self.beta_s * entropy

                value_loss = clipped_value_loss(values, rewards, old_values, self.value_clip)

                (policy_loss.mean() + value_loss.mean()).backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

    # 执行一个episode的学习
    # 定义一个前向传播函数，用于执行模型的前向传播操作
    def forward(self):
        # 获取模型参数中的设备信息
        device = next(self.parameters()).device
        # 解冻所有层的参数
        unfreeze_all_layers_(self.anymal)

        # 初始化时间步数和状态信息
        time = 0
        states = self.env.reset() # 状态假设为（本体感知，外部感知，特权信息）
        memories = deque([])

        # 清空本体感知和外部感知的运行均值
        self.running_proprio.clear()
        self.running_extero.clear()

        # 循环执行最大时间步数次
        for timestep in range(self.max_timesteps):
            time += 1

            # 将状态信息转移到指定设备上
            states = list(map(lambda t: t.to(device), states))
            proprio, extero, privileged = states

            # 更新用于教师的观测运行均值
            self.running_proprio.push(proprio)
            self.running_extero.push(extero)

            # 对教师的观测状态进行归一化处理（本体感知和外部感知）
            states = (
                self.running_proprio.norm(proprio),
                self.running_extero.norm(extero),
                privileged
            )

            # 将状态信息重新排列为适合模型输入的形式
            anymal_states = list(map(lambda t: rearrange(t, '... -> 1 ...'), states))

            # 执行模型的前向传播操作，获取动作分布和值
            dist, values = self.anymal.forward_teacher(
                *anymal_states,
                return_value_head = True,
                return_action_categorical_dist = True
            )

            # 从动作分布中采样动作
            action = dist.sample()
            action_log_prob = dist.log_prob(action)
            action = action.item()

            # 执行动作，获取下一个状态、奖励、是否结束标志和额外信息
            next_states, reward, done, _ = self.env.step(action)

            # 创建记忆对象，存储状态、动作、动作对数概率、奖励、是否结束标志和值
            memory = Memory(states, action, action_log_prob, reward, done, values)
            memories.append(memory)

            # 更新状态信息为下一个状态
            states = next_states

            # 每隔一定时间步数执行一次经验回放和学习
            if time % self.update_timesteps == 0:
                self.learn_from_memories(memories, next_states)
                memories.clear()

            # 如果环境结束，则跳出循环
            if done:
                break

        # 打印训练完成一���的信息
        print('trained for 1 episode')

`.\lucidrains\anymal-belief-state-encoder-decoder-pytorch\anymal_belief_state_encoder_decoder_pytorch\running.py`

# 导入 torch 库
import torch
# 从 torch 库中导入 nn 模块
from torch import nn

# 定义 RunningStats 类，继承自 nn.Module 类
class RunningStats(nn.Module):
    # 初始化方法，接受 shape 和 eps 两个参数
    def __init__(self, shape, eps = 1e-5):
        super().__init__()
        # 如果 shape 不是元组，则转换为元组
        shape = shape if isinstance(shape, tuple) else (shape,)

        # 初始化对象的 shape、eps 和 n 属性
        self.shape = shape
        self.eps = eps
        self.n = 0

        # 注册缓冲区 old_mean、new_mean、old_std、new_std，并设置为非持久化
        self.register_buffer('old_mean', torch.zeros(shape), persistent = False)
        self.register_buffer('new_mean', torch.zeros(shape), persistent = False)
        self.register_buffer('old_std', torch.zeros(shape), persistent = False)
        self.register_buffer('new_std', torch.zeros(shape), persistent = False)

    # 清空方法，将 n 属性重置为 0
    def clear(self):
        self.n = 0

    # 推送方法，接受输入 x，并更新均值和标准差
    def push(self, x):
        self.n += 1

        # 如果 n 为 1，则将 old_mean 和 new_mean 设置为 x 的数据，old_std 和 new_std 设置为 0
        if self.n == 1:
            self.old_mean.copy_(x.data)
            self.new_mean.copy_(x.data)
            self.old_std.zero_()
            self.new_std.zero_()
            return

        # 更新均值和标准差
        self.new_mean.copy_(self.old_mean + (x - self.old_mean) / self.n)
        self.new_std.copy_(self.old_std + (x - self.old_mean) * (x - self.new_mean))

        self.old_mean.copy_(self.new_mean)
        self.old_std.copy_(self.new_std)

    # 返回均值的方法
    def mean(self):
        return self.new_mean if self.n else torch.zeros_like(self.new_mean)

    # 返回方差的方法
    def variance(self):
        return (self.new_std / (self.n - 1)) if self.n > 1 else torch.zeros_like(self.new_std)

    # 返回标准差的倒数的方法
    def rstd(self):
        return torch.rsqrt(self.variance() + self.eps)

    # 归一化方法，接受输入 x，返回归一化后的结果
    def norm(self, x):
        return (x - self.mean()) * self.rstd()

`.\lucidrains\anymal-belief-state-encoder-decoder-pytorch\anymal_belief_state_encoder_decoder_pytorch\trainer.py`

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam

from collections import deque
from einops import rearrange

from anymal_belief_state_encoder_decoder_pytorch import Anymal

# 定义一个继承自Dataset的类，用于存储经验数据
class ExperienceDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data

    def __len__(self):
        return len(self.data[0])

    def __getitem__(self, ind):
        return tuple(map(lambda t: t[ind], self.data))

# 创建一个DataLoader对象，用于批量加载数据
def create_dataloader(data, batch_size):
    ds = ExperienceDataset(data)
    return DataLoader(ds, batch_size = batch_size, drop_last = True)

# 定义一个继承自nn.Module的类，用于训练学生模型
class StudentTrainer(nn.Module):
    def __init__(
        self,
        *,
        anymal,
        env,
        epochs = 2,
        lr = 5e-4,
        max_timesteps = 10000,
        update_timesteps = 5000,
        minibatch_size = 16,
        truncate_tpbtt = 10
    ):
        super().__init__()
        self.env = env
        self.anymal = anymal
        self.optimizer = Adam(anymal.student.parameters(), lr = lr)
        self.epochs = epochs

        self.max_timesteps = max_timesteps
        self.update_timesteps = update_timesteps
        self.minibatch_size = minibatch_size
        self.truncate_tpbtt = truncate_tpbtt

        self.running_proprio, self.running_extero = anymal.get_observation_running_stats()

    # 从记忆中学习
    def learn_from_memories(
        self,
        memories,
        next_states,
        noise_strength = 0.
    ):
        device = next(self.parameters()).device

        # 从记忆中检索和准备数据进行训练

        states = []
        teacher_states = []
        hiddens = []
        dones = []

        for (state, teacher_state, hidden, done) in memories:
            states.append(state)
            teacher_states.append(teacher_state)
            hiddens.append(hidden)
            dones.append(torch.Tensor([done]))

        states = tuple(zip(*states))
        teacher_states = tuple(zip(*teacher_states))

        # 将值转换为torch张量

        to_torch_tensor = lambda t: torch.stack(t).to(device).detach()

        states = map(to_torch_tensor, states)
        teacher_states = map(to_torch_tensor, teacher_states)
        hiddens = to_torch_tensor(hiddens)
        dones = to_torch_tensor(dones)

        # 为策略阶段训练准备数据加载器

        dl = create_dataloader([*states, *teacher_states, hiddens, dones], self.minibatch_size)

        current_hiddens = self.anymal.student.get_gru_hiddens()
        current_hiddens = rearrange(current_hiddens, 'l d -> 1 l d')

        for _ in range(self.epochs):
            for ind, (proprio, extero, privileged, teacher_proprio, teacher_extero, episode_hiddens, done) in enumerate(dl):

                straight_through_hiddens = current_hiddens - current_hiddens.detach() + episode_hiddens

                loss, current_hiddens = self.anymal(
                    proprio,
                    extero,
                    privileged,
                    teacher_states = (teacher_proprio, teacher_extero),
                    hiddens = straight_through_hiddens,
                    noise_strength = noise_strength
                )

                loss.backward(retain_graph = True)

                tbptt_limit = not ((ind + 1) % self.truncate_tpbtt)
                if tbptt_limit: # 控制梯度回传的时间跨度
                    self.optimizer.step()
                    self.optimizer.zero_grad()
                    current_hiddens = current_hiddens.detach()

                # 根据是否是新的一集，分离隐藏状态
                # 待办事项：重新构建数据加载器以每批行加载一个集

                maybe_detached_hiddens = []
                for current_hidden, done in zip(current_hiddens.unbind(dim = 0), dones.unbind(dim = 0)):
                    maybe_detached_hiddens.append(current_hidden.detach() if done else current_hidden)

                current_hiddens = torch.stack(maybe_detached_hiddens)

    # 前向传播函数
    def forward(
        self,
        noise_strength = 0.
    ):
        device = next(self.parameters()).device

        time = 0
        done = False
        states = self.env.reset()
        memories = deque([])

        hidden = self.anymal.student.get_gru_hiddens()
        hidden = rearrange(hidden, 'l d -> 1 l d')

        self.running_proprio.clear()
        self.running_extero.clear()

        for timestep in range(self.max_timesteps):
            time += 1

            states = list(map(lambda t: t.to(device), states))
            anymal_states = list(map(lambda t: rearrange(t, '... -> 1 ...'), states))

            # 教师需要有归一化的观测值

            (proprio, extero, privileged) = states

            self.running_proprio.push(proprio)
            self.running_extero.push(extero)

            teacher_states = (
                self.running_proprio.norm(proprio),
                self.running_extero.norm(extero)
            )

            teacher_anymal_states = list(map(lambda t: rearrange(t, '... -> 1 ...'), teacher_states))

            # 将状态添加到记忆中

            memories.append((
                states,
                teacher_states,
                rearrange(hidden, '1 ... -> ...'),
                done
            ))

            dist, hidden = self.anymal.forward_student(
                *anymal_states[:-1],
                hiddens = hidden,
                return_action_categorical_dist = True
            )

            action = dist.sample()
            action_log_prob = dist.log_prob(action)
            action = action.item()

            next_states, _, done, _ = self.env.step(action)

            states = next_states

            if time % self.update_timesteps == 0:
                self.learn_from_memories(memories, next_states, noise_strength = noise_strength)
                memories.clear()

            if done:
                break

`.\lucidrains\anymal-belief-state-encoder-decoder-pytorch\anymal_belief_state_encoder_decoder_pytorch\init.py`

# 从anymal_belief_state_encoder_decoder_pytorch.networks模块中导入Student, Teacher, MLP, Anymal类
from anymal_belief_state_encoder_decoder_pytorch.networks import Student, Teacher, MLP, Anymal
# 从anymal_belief_state_encoder_decoder_pytorch.ppo模块中导入PPO, MockEnv类
from anymal_belief_state_encoder_decoder_pytorch.ppo import PPO, MockEnv

Belief State Encoder / Decoder (Anymal) - Pytorch

Implementation of the Belief State Encoder / Decoder in the new breakthrough robotics paper from ETH Zürich.

This paper is important as it seems their learned approach produced a policy that rivals Boston Dynamic’s handcrafted algorithms (quadripedal Spot).

The results speak for itself in their video demonstration

Install

$ pip install anymal-belief-state-encoder-decoder-pytorch

Usage

Teacher

import torch
from anymal_belief_state_encoder_decoder_pytorch import Teacher

teacher = Teacher(
    num_actions = 10,
    num_legs = 4,
    extero_dim = 52,
    proprio_dim = 133,
    privileged_dim = 50
)

proprio = torch.randn(1, 133)
extero = torch.randn(1, 4, 52)
privileged = torch.randn(1, 50)

action_logits, values = teacher(proprio, extero, privileged, return_values = True) # (1, 10)

Student

import torch
from anymal_belief_state_encoder_decoder_pytorch import Student

student = Student(
    num_actions = 10,
    num_legs = 4,
    extero_dim = 52,
    proprio_dim = 133,
    gru_num_layers = 2,
    gru_hidden_size = 50
)

proprio = torch.randn(1, 133)
extero = torch.randn(1, 4, 52)

action_logits, hiddens = student(proprio, extero) # (1, 10), (2, 1, 50)
action_logits, hiddens = student(proprio, extero, hiddens) # (1, 10), (2, 1, 50)
action_logits, hiddens = student(proprio, extero, hiddens) # (1, 10), (2, 1, 50)

# hiddens are in the shape (num gru layers, batch size, gru hidden dimension)
# train with truncated bptt

Full Anymal (which contains both Teacher and Student)

import torch
from anymal_belief_state_encoder_decoder_pytorch import Anymal

anymal = Anymal(
    num_actions = 10,
    num_legs = 4,
    extero_dim = 52,
    proprio_dim = 133,
    privileged_dim = 50,
    recon_loss_weight = 0.5
)

# mock data

proprio = torch.randn(1, 133)
extero = torch.randn(1, 4, 52)
privileged = torch.randn(1, 50)

# first train teacher

teacher_action_logits = anymal.forward_teacher(proprio, extero, privileged)

# teacher is trained with privileged information in simulation with domain randomization

# after teacher has satisfactory performance, init the student with the teacher weights, excluding the privilege information encoder from the teacher (which student does not have)

anymal.init_student_with_teacher()

# then train the student on the proprioception and noised exteroception, forcing it to reconstruct the privileged information that the teacher had access to (as well as learning to denoise the exterception) - there is also a behavior loss between the policy logits of the teacher with those of the student

loss, hiddens = anymal(proprio, extero, privileged)
loss.backward()

# finally, you can deploy the student to the real world, zero-shot

anymal.eval()
dist, hiddens = anymal.forward_student(proprio, extero, return_action_categorical_dist = True)
action = dist.sample()

PPO training of the Teacher (using a mock environment, this needs to be substituted with a environment wrapper around simulator)

import torch
from anymal_belief_state_encoder_decoder_pytorch import Anymal, PPO
from anymal_belief_state_encoder_decoder_pytorch.ppo import MockEnv

anymal = Anymal(
    num_actions = 10,
    num_legs = 4,
    extero_dim = 52,
    proprio_dim = 133,
    privileged_dim = 50,
    recon_loss_weight = 0.5
)

mock_env = MockEnv(
    proprio_dim = 133,
    extero_dim = 52,
    privileged_dim = 50
)

ppo = PPO(
    env = mock_env,
    anymal = anymal,
    epochs = 10,
    lr = 3e-4,
    eps_clip = 0.2,
    beta_s = 0.01,
    value_clip = 0.4,
    max_timesteps = 10000,
    update_timesteps = 5000,
)

# train for 10 episodes

for _ in range(10):
    ppo()

# save the weights of the teacher for student training

torch.save(anymal.state_dict(), './anymal-with-trained-teacher.pt')

To train the student

import torch
from anymal_belief_state_encoder_decoder_pytorch import Anymal
from anymal_belief_state_encoder_decoder_pytorch.trainer import StudentTrainer
from anymal_belief_state_encoder_decoder_pytorch.ppo import MockEnv

anymal = Anymal(
    num_actions = 10,
    num_legs = 4,
    extero_dim = 52,
    proprio_dim = 133,
    privileged_dim = 50,
    recon_loss_weight = 0.5
)

# first init student with teacher weights, at the very beginning
# if not resuming training

mock_env = MockEnv(
    proprio_dim = 133,
    extero_dim = 52,
    privileged_dim = 50
)

trainer = StudentTrainer(
    anymal = anymal,
    env = mock_env
)

# for 100 episodes

for _ in range(100):
    trainer()

… You’ve beaten Boston Dynamics and its team of highly paid control engineers!

But you probably haven’t beaten a real quadripedal “anymal” just yet 😃

Todo

finish belief state decoder
wrapper class that instantiates both teacher and student, handle student forward pass with reconstruction loss + behavioral loss
handle noising of exteroception for student
add basic PPO logic for teacher
add basic student training loop with mock environment
make sure all hyperparameters for teacher PPO training + teacher / student distillation is in accordance with appendix
noise scheduler for student (curriculum factor that goes from 0 to 1 from epochs 1 to 100)
fix student training, it does not look correct
make sure tbptt is setup correctly
add reward crafting as in paper
play around with deepminds mujoco

Diagrams

Citations

@article{2022,
  title     = {Learning robust perceptive locomotion for quadrupedal robots in the wild},
  url       = {http://dx.doi.org/10.1126/scirobotics.abk2822},
  journal   = {Science Robotics},
  publisher = {American Association for the Advancement of Science (AAAS)},
  author    = {Miki, Takahiro and Lee, Joonho and Hwangbo, Jemin and Wellhausen, Lorenz and Koltun, Vladlen and Hutter, Marco},
  year      = {2022},
  month     = {Jan}
}

`.\lucidrains\anymal-belief-state-encoder-decoder-pytorch\setup.py`

# 导入设置工具和查找包的函数
from setuptools import setup, find_packages

# 设置包的元数据
setup(
  name = 'anymal-belief-state-encoder-decoder-pytorch',  # 包的名称
  packages = find_packages(exclude=[]),  # 查找所有包
  version = '0.0.20',  # 版本号
  license='MIT',  # 许可证
  description = 'Anymal Belief-state Encoder Decoder - Pytorch',  # 描述
  author = 'Phil Wang',  # 作者
  author_email = 'lucidrains@gmail.com',  # 作者邮箱
  url = 'https://github.com/lucidrains/anymal-belief-state-encoder-decoder-pytorch',  # 项目链接
  keywords = [
    'artificial intelligence',  # 关键词
    'deep learning',  # 关键词
    'attention gating',  # 关键词
    'belief state',  # 关键词
    'robotics'  # 关键词
  ],
  install_requires=[
    'einops>=0.4',  # 安装所需的依赖包
    'einops-exts',  # 安装所需的依赖包
    'torch>=1.6',  # 安装所需的依赖包
  ],
  classifiers=[
    'Development Status :: 4 - Beta',  # 分类器
    'Intended Audience :: Developers',  # 分类器
    'Topic :: Scientific/Engineering :: Artificial Intelligence',  # 分类器
    'License :: OSI Approved :: MIT License',  # 分类器
    'Programming Language :: Python :: 3.6',  # 分类器
  ],
)

`.\lucidrains\AoA-pytorch\aoa_pytorch\aoa_pytorch.py`

import torch
from torch import nn, einsum
import torch.nn.functional as F

from einops import rearrange

# 定义一个函数，用于检查变量是否存在
def exists(val):
    return val is not None

# 定义一个函数，如果变量存在则返回该变量，否则返回默认值
def default(val, d):
    return val if exists(val) else d

# 定义一个名为AttentionOnAttention的类，继承自nn.Module
class AttentionOnAttention(nn.Module):
    def __init__(
        self,
        *,
        dim,
        dim_head = 64,
        heads = 8,
        dropout = 0.,
        aoa_dropout = 0.
    ):
        super().__init__()
        inner_dim = dim_head * heads
        self.heads = heads
        self.scale = dim_head ** -0.5

        # 定义线性层，用于将输入转换为查询向量
        self.to_q = nn.Linear(dim, inner_dim, bias = False)
        # 定义线性层，用于将输入转换为键值对
        self.to_kv = nn.Linear(dim, inner_dim * 2, bias = False)

        # 定义dropout层
        self.dropout = nn.Dropout(dropout)

        # 定义Attention on Attention模块
        self.aoa = nn.Sequential(
            nn.Linear(2 * inner_dim, 2 * dim),
            nn.GLU(),
            nn.Dropout(aoa_dropout)
        )

    # 前向传播函数
    def forward(self, x, context = None):
        h = self.heads

        # 将输入x转换为查询向量
        q_ = self.to_q(x)

        # 如果存在上下文信息，则使用上下文信息作为键值对，否则使用输入x作为键值对
        context = default(context, x)
        kv = self.to_kv(context).chunk(2, dim = -1)

        # 将查询向量、键向量和值向量按照头数拆分
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q_, *kv))
        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale

        # 计算注意力权重
        attn = dots.softmax(dim = -1)
        attn = self.dropout(attn)

        # 加权平均值
        attn_out = einsum('b h i j, b h j d -> b h i d', attn, v)

        # 合并头部
        out = rearrange(attn_out, 'b h n d -> b n (h d)', h = h)

        # Attention on Attention模块
        out = self.aoa(torch.cat((out, q_), dim = -1))
        return out

`.\lucidrains\AoA-pytorch\aoa_pytorch\init.py`

# 从 aoa_pytorch 模块中导入 AttentionOnAttention 类
from aoa_pytorch.aoa_pytorch import AttentionOnAttention
# 将 AttentionOnAttention 类赋值给 AoA 变量
AoA = AttentionOnAttention

Attention on Attention - Pytorch

A Pytorch implementation of the Attention on Attention module, from the paper An Improved Attention for Visual Question Answering. The repository will include both the Self and Guided (cross-attention) variants.

Install

$ pip install aoa-pytorch

Usage

Self Attention on Attention

import torch
from aoa_pytorch import AoA

attn = AoA(
    dim = 512,
    heads = 8
)

x = torch.randn(1, 1024, 512)
attn(x) + x # (1, 1024, 512)

Guided Attention on Attention

```python
import torch
from aoa_pytorch import AoA

attn = AoA(
    dim = 512,
    heads = 8
)

x = torch.randn(1, 1024, 512)
context = torch.randn(1, 1024, 512)

attn(x, context = context) + x # (1, 1024, 512)
```py

## Citations

```py
@misc{rahman2020improved,
    title   = {An Improved Attention for Visual Question Answering}, 
    author  = {Tanzila Rahman and Shih-Han Chou and Leonid Sigal and Giuseppe Carenini},
    year    = {2020},
    eprint  = {2011.02164},
    archivePrefix = {arXiv},
    primaryClass = {cs.CV}
}
```py

```py
@misc{huang2019attention,
    title   = {Attention on Attention for Image Captioning}, 
    author  = {Lun Huang and Wenmin Wang and Jie Chen and Xiao-Yong Wei},
    year    = {2019},
    eprint  = {1908.06954},
    archivePrefix = {arXiv},
    primaryClass = {cs.CV}
}

`.\lucidrains\AoA-pytorch\setup.py`

# 导入设置工具和查找包的函数
from setuptools import setup, find_packages

# 设置包的元数据
setup(
  name = 'aoa_pytorch', # 包的名称
  packages = find_packages(exclude=['examples']), # 查找并包含除了 examples 之外的所有包
  version = '0.0.2', # 版本号
  license='MIT', # 许可证信息
  description = 'Attention on Attention - Pytorch', # 包的描述
  author = 'Phil Wang', # 作者
  author_email = 'lucidrains@gmail.com', # 作者的邮箱
  url = 'https://github.com/lucidrains/SAoA-pytorch', # 项目的链接
  keywords = [
    'artificial intelligence', # 关键词：人工智能
    'attention mechanism', # 关键词：注意力机制
    'visual question answering' # 关键词：视觉问题回答
  ],
  install_requires=[
    'torch>=1.6', # 安装所需的 torch 版本
    'einops>=0.3' # 安装所需的 einops 版本
  ],
  classifiers=[
    'Development Status :: 4 - Beta', # 开发状态
    'Intended Audience :: Developers', # 预期的受众
    'Topic :: Scientific/Engineering :: Artificial Intelligence', # 主题
    'License :: OSI Approved :: MIT License', # 许可证
    'Programming Language :: Python :: 3.6', # 使用的编程语言版本
  ],
)

`.\lucidrains\attention-tensorflow-mesh\attention_tensorflow_mesh\attention_tensorflow_mesh.py`

# 导入必要的库
import math
import mesh_tensorflow as mtf
import tensorflow.compat.v1 as tf

# 辅助函数

# 如果值为None，则返回默认值
def default(val, d):
    return val if val is not None else d

# 简单的线性层

def linear(x, dim_out, scope = 'linear', bias = True):
    with tf.variable_scope(scope):
        *_, dim_in = x.shape
        w_init_stdev = 1 / math.sqrt(dim_in.size)

        return  mtf.layers.dense(x, new_dims=[dim_out], reduced_dims=[dim_in], name=scope, use_bias=bias,
                                 kernel_initializer=tf.random_normal_initializer(stddev=w_init_stdev, dtype=tf.float32))

# 归一化

def norm(x, axis = None, epsilon=1e-5):
    axis = default(axis, x.shape[-1])

    u = mtf.reduce_mean(x, reduced_dim=axis)
    s = mtf.reduce_mean(mtf.square(x - u), reduced_dim=axis)

    u = mtf.broadcast(u, x.shape)
    s = mtf.broadcast(s, x.shape)

    return (x - u) * mtf.rsqrt(s + epsilon)

# 缩放归一化
def scale_norm(x, scope, *, axis=None, epsilon=1e-5, params=None):
    if axis is None:
        axis = x.shape[-1]

    with tf.variable_scope(scope):
        n_state = x.shape[-1]

        dt = tf.float32

        g = mtf.get_variable(x.mesh, 'g', [], initializer=tf.constant_initializer(1, dtype=dt), dtype=dt)

        x = norm(x, axis, epsilon)
        x = x * g
        return x

# 预归一化
def prenorm(fn, scope):
    def inner(x, *args, **kwargs):
        return fn(scale_norm(x, scope), *args, **kwargs)
    return inner

# 残差连接
def residual(fn):
    def inner(x, *args, **kwargs):
        return fn(x, *args, **kwargs) + x
    return inner

# 完整的多头注意力机制
def attention(x, dim_head, dim_features_head, scope = 'attn', causal = False):
    with tf.variable_scope(scope):
        mesh, batch, seq, dim = x.mesh, *x.shape

        dim_heads = mtf.Dimension('dim_heads', dim_head.size * dim_features_head.size)
        dim_intermediate = mtf.Dimension('qkv_dimension', dim_heads.size * 3)
        qkv = linear(x, dim_intermediate, bias = False, scope='to_qkv')

        q, k, v = mtf.split(qkv, dim_intermediate, 3)
        q, k, v = map(lambda t: mtf.reshape(t, [batch, seq, dim_head, dim_features_head]), (q, k, v))
        q, k, v = map(lambda t: mtf.transpose(t, [batch, dim_head, seq, dim_features_head]), (q, k, v))

        k, v = map(lambda t: mtf.rename_dimension(t, seq.name, 'memory_length'), (k, v))
        mem_len_dim = v.shape[-2]

        dots = mtf.layers.us_einsum([q, k], [batch, dim_head, seq, mem_len_dim])

        if causal:
            i = mtf.range(mesh, seq, tf.int32)
            j = mtf.range(mesh, mem_len_dim, tf.int32)
            i, j = map(lambda t: mtf.broadcast(t, [seq, mem_len_dim]), (i, j))
            mask = mtf.less(i + mem_len_dim.size - seq.size, j)
            mask = mtf.cast(mask, tf.float32) * -1e10
            dots += mask

        attn = mtf.softmax(dots, mem_len_dim)
        out = mtf.einsum([attn, v], [batch, dim_head, seq, dim_features_head])

        out = mtf.transpose(out, [batch, seq, dim_head, dim_features_head])
        out = mtf.reshape(out, [batch, seq, dim_heads])

        combined_out = linear(out, dim, scope='combine_output')
        return combined_out

# 前馈神经网络
def ff(x, mult = 4, scope = 'ff'):
    *_, dim = x.shape

    with tf.variable_scope(scope):
        dim_intermediate = mtf.Dimension('ff_intermediate', dim.size * mult)
        h = linear(x, dim_intermediate, scope='w1')
        h = mtf.gelu(h)
        h = linear(h, dim, scope='w2')
        return h

# 块
def transformer(x, *, depth, dim_head, dim_features_head, causal = False):
    attn_fn = residual(prenorm(attention, 'norm1'))
    ff_fn = residual(prenorm(ff, 'norm2'))

    for i in range(depth):
        with tf.variable_scope(f'layer_{i}'):
            x = attn_fn(x, dim_head, dim_features_head, causal = causal)
            x = ff_fn(x)
    return x

# 语言模型
def transformer_lm(x, *, dim, num_tokens, depth, max_seq_len, dim_head, dim_features_head, causal = False):
    mesh, batch, seq_dim = x.mesh, *x.shape

    dim = mtf.Dimension('dim', dim)
    dim_head = mtf.Dimension('dim_head', dim_head)
    dim_features_head = mtf.Dimension('dim_features_head', dim_features_head)
    dim_num_tokens = mtf.Dimension('vocab_size', num_tokens)
    dim_max_seq_len = mtf.Dimension('max_seq_len', max_seq_len)

    wte = mtf.get_variable(mesh, name='wte', shape=mtf.Shape([dim_num_tokens, dim]), dtype=tf.float32)
    wpe = mtf.get_variable(mesh, name='wpe', shape=mtf.Shape([seq_dim, dim]), dtype=tf.float32)

    x = mtf.gather(wte, x, dim_num_tokens)
    p = mtf.gather(wpe, mtf.range(mesh, seq_dim, dtype=tf.int32), dim_max_seq_len)
    x = x + p

    x = transformer(x, depth = depth, dim_head = dim_head, dim_features_head = dim_features_head, causal = causal)

    logits = linear(x, dim_num_tokens, scope='to_logits')
    return logits