isaacLab 机器人学习三

NameHello

已于 2024-12-17 20:00:57 修改

阅读量2.2k

点赞数 36

分类专栏： RL 机器人代码文章标签：学习机器人

于 2024-11-13 14:20:17 首次发布

本文链接：https://blog.csdn.net/weixin_41826102/article/details/143722213

版权

RL 机器人代码专栏收录该内容

9 篇文章

订阅专栏

水平有限，谨慎参考

1 宇树G1为例

前面已经以倒立摆为例

现在以宇树G1为例，看看有哪些不一样的地方。

source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\manager_based\locomotion\velocity\config\g1

在该路径下：

agents\rsl_rl_ppo_cfg.py：

可以看到 G1 在用PPO进行训练时，与倒立摆不同的主要是：

迭代次数
actor，critic 的神经网络隐藏层数
探索熵稍微增加

# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from omni.isaac.lab.utils import configclass

from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import (
    RslRlOnPolicyRunnerCfg,
    RslRlPpoActorCriticCfg,
    RslRlPpoAlgorithmCfg,
)


@configclass
class G1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
    num_steps_per_env = 24#每次计算梯度前，采样的次数（策略与环境交互的次数）
    max_iterations = 3000#最大迭代次数为3000。这意味着在训练过程中，算法最多会运行3000次迭代。
    save_interval = 50 #每经过50次迭代，模型的状态会被保存一次。这有助于在训练过程中保留检查点，以便后续的模型恢复或分析。
    experiment_name = "g1_rough"#此配置文件的实验名称为“g1_rough”。这个名称常用于识别或区分不同的实验
    empirical_normalization = False
    policy = RslRlPpoActorCriticCfg(
        init_noise_std=1.0,
        actor_hidden_dims=[512, 256, 128],
        critic_hidden_dims=[512, 256, 128],
        activation="elu",
    )
    #这是算法（PPO）的具体配置。
# value_loss_coef=1.0：价值损失系数，用于调整价值损失对总损失的影响。
# use_clipped_value_loss=True：使用截断价值损失来稳定训练过程。
# clip_param=0.2：PPO中的截断参数，用于限制政策更新的幅度。
# entropy_coef=0.008：熵系数，鼓励策略在训练时保持一定的探索。
# num_learning_epochs=5：每次采样后，进行5次学习（或network参数更新）。
# num_mini_batches=4：数据划分为4个小批量进行训练以提高更新的稳定性。
# learning_rate=1.0e-3：学习率，控制网络更新的步幅。
# schedule="adaptive"：学习率调整策略，通常根据训练进展调整学习率。
# gamma=0.99：折扣因子，影响算法对未来奖励的重视程度。
# lam=0.95：GAE（广义优势估计）的参数，用于平衡偏差和方差。
# desired_kl=0.01：目标KL散度，用于监控策略更新幅度。
# max_grad_norm=1.0：梯度裁剪的最大范数，用于防止梯度爆炸。
    algorithm = RslRlPpoAlgorithmCfg(
        value_loss_coef=1.0,
        use_clipped_value_loss=True,
        clip_param=0.2,
        entropy_coef=0.008,
        num_learning_epochs=5,
        num_mini_batches=4,
        learning_rate=1.0e-3,
        schedule="adaptive",
        gamma=0.99,
        lam=0.95,
        desired_kl=0.01,
        max_grad_norm=1.0,
    )


@configclass
class G1FlatPPORunnerCfg(G1RoughPPORunnerCfg):
    def __post_init__(self):
        super().__post_init__()

        self.max_iterations = 1500
        self.experiment_name = "g1_flat"
        self.policy.actor_hidden_dims = [256, 128, 128]
        self.policy.critic_hidden_dims = [256, 128, 128]

在init 中，注册了gym环境，是依靠task名称后续来启动的。

gym.register(
    id="Isaac-Velocity-Rough-G1-v0",
    entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
    disable_env_checker=True,
    kwargs={
        "env_cfg_entry_point": rough_env_cfg.G1RoughEnvCfg,
        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1RoughPPORunnerCfg",
    },
)

同时对于环境配置：

有flat ,rough两个，其中，flat 继承了rough 。

而rough 里，又首先导入了velocity_env_cfg.py狗或者人公用的速度指令指令环境配置

2 velocity_env_cfg.py，

source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\manager_based\locomotion\velocity\velocity_env_cfg.py

# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

import math
from dataclasses import MISSING

import omni.isaac.lab.sim as sim_utils
from omni.isaac.lab.assets import ArticulationCfg, AssetBaseCfg
from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
from omni.isaac.lab.managers import CurriculumTermCfg as CurrTerm
from omni.isaac.lab.managers import EventTermCfg as EventTerm
from omni.isaac.lab.managers import ObservationGroupCfg as ObsGroup
from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
from omni.isaac.lab.managers import RewardTermCfg as RewTerm
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm
from omni.isaac.lab.scene import InteractiveSceneCfg
from omni.isaac.lab.sensors import ContactSensorCfg, RayCasterCfg, patterns
from omni.isaac.lab.terrains import TerrainImporterCfg
from omni.isaac.lab.utils import configclass
from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR
from omni.isaac.lab.utils.noise import AdditiveUniformNoiseCfg as Unoise

import omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp as mdp

##
# Pre-defined configs
##
from omni.isaac.lab.terrains.config.rough import ROUGH_TERRAINS_CFG  # isort: skip


##
# Scene definition
##


@configclass
class MySceneCfg(InteractiveSceneCfg):
    """Configuration for the terrain scene with a legged robot."""
     
    # ground terrain
    #这是一个地形配置部分，使用 TerrainImporterCfg 来定义地形的相关参数
    terrain = TerrainImporterCfg(
        prim_path="/World/ground",
        #指定地形的类型为生成器。这通常意味着地形是通过某种生成算法动态创建的，而不是预先定义的静态地形
        terrain_type="generator",
        #该参数指定使用粗糙地形配置（ROUGH_TERRAINS_CFG）来生成地形
        terrain_generator=ROUGH_TERRAINS_CFG,
        #指定初始地形的最大级别为 5。这可能涉及到地形的复杂性或难度级别
        max_init_terrain_level=5,
        #这是一个用于碰撞检测的组标识符。-1 通常意味着地形不属于任何特定的碰撞组
        collision_group=-1,
         #定义地形的物理材料属性。
        physics_material=sim_utils.RigidBodyMaterialCfg(
           #摩擦
            friction_combine_mode="multiply",
            #弹性
            restitution_combine_mode="multiply",
            #静摩擦
            static_friction=1.0,
            #动摩擦
            dynamic_friction=1.0,
        ),
        #关于视觉的材料配置，纹理什么的
        visual_material=sim_utils.MdlFileCfg(
            mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl",
            project_uvw=True,
            texture_scale=(0.25, 0.25),
        ),
        debug_vis=False,
    )
    # robots
    # 机器人配置
    robot: ArticulationCfg = MISSING
    # sensors
    #这是一个激光雷达传感器的配置对象，使用了 RayCasterCfg 类
    height_scanner = RayCasterCfg(
        #加到base上
        prim_path="{ENV_REGEX_NS}/Robot/base",
        offset=RayCasterCfg.OffsetCfg(pos=(0.0, 0.0, 20.0)),
        #只绕yaw 方向进行旋转
        attach_yaw_only=True,
        #网格扫描模式，分辨率0.1
        pattern_cfg=patterns.GridPatternCfg(resolution=0.1, size=[1.6, 1.0]),
        #调试可视化
        debug_vis=False,
        #扫描目标
        mesh_prim_paths=["/World/ground"],
    )
    #接触传感器。
    # {正则表达式，机器人关节所有部分}，
    # 存储3个时间步的接触数据，追踪离地时间（离地任务中估计）
    contact_forces = ContactSensorCfg(prim_path="{ENV_REGEX_NS}/Robot/.*", history_length=3, track_air_time=True)
    # lights
    sky_light = AssetBaseCfg(
        prim_path="/World/skyLight",
        spawn=sim_utils.DomeLightCfg(
            intensity=750.0,
            texture_file=f"{ISAAC_NUCLEUS_DIR}/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr",
        ),
    )


##
# MDP settings
##


@configclass
class CommandsCfg:
    """Command specifications for the MDP."""

    base_velocity = mdp.UniformVelocityCommandCfg(
        #作用在robot 实体上
        asset_name="robot",
        #指令速度的采样范围
        resampling_time_range=(10.0, 10.0),
        # 站立的稳定性要求？
        rel_standing_envs=0.02,
        #航向的相对参数
        rel_heading_envs=1.0,
        #通过指令控制机器人航向
        heading_command=True,
        #航向的刚度系数，越大，跟随航线的力度越大
        heading_control_stiffness=0.5,
        #允许可视化调试
        debug_vis=True,
        #使用UniformVelocityCommandCfg，
        #包括X Y Z 的指令线速度范围，以及航向角范围
        ranges=mdp.UniformVelocityCommandCfg.Ranges(
            lin_vel_x=(-1.0, 1.0), lin_vel_y=(-1.0, 1.0), ang_vel_z=(-1.0, 1.0), heading=(-math.pi, math.pi)
        ),
    )


@configclass
class ActionsCfg:
    """Action specifications for the MDP."""
    #匹配所有关节，输出的关节位置* 0.5，是基于默认的关节偏移量基础上的动作
    joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True)


@configclass
class ObservationsCfg:
    """Observation specifications for the MDP."""
    
    # 注：
    # 观测是描述机器人状态，强化学习状态概念的。（必须是IMU、编码器，指令等仿真或真机能提供的）
    # 1 首先基本都包括机器人的body的或者关节的能够以广义坐标形式描述的位置姿态速度
    # 2 为了跟踪指令，给强化学习给到变化趋势这一度量，一般包含当前的指令和过去的指令
    # 3 状态中一般加入噪声，与真实传感器环境匹配
    # 4 上次的输出动作，输入指令等
    # 5 其余地形高度，触地，等等信息。

    # PolicyCfg：观测名称，ObsGroup：整体观测组
    @configclass
    class PolicyCfg(ObsGroup):
        """Observations for policy group."""

        # observation terms (order preserved)
        #该观察项获取机器人的线速度，函数 mdp.base_lin_vel 提供了该信息
        #加入噪声，范围是：-0.1 到0.1
        base_lin_vel = ObsTerm(func=mdp.base_lin_vel, noise=Unoise(n_min=-0.1, n_max=0.1))
        #角速度。机器人正常行走运动时会有大概20度，40度左右，就是0.5弧度左右的角速度，这里噪声0.2
        base_ang_vel = ObsTerm(func=mdp.base_ang_vel, noise=Unoise(n_min=-0.2, n_max=0.2))
        #机器人的重力加速度的投影（消除当前姿态影响），不动时是1（1个g）.
        projected_gravity = ObsTerm(
            func=mdp.projected_gravity,
            noise=Unoise(n_min=-0.05, n_max=0.05),
        )
        #获取速度指令，这个应该是机器人base实际接收到的指令
        velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"})
        #机器人关节位置，噪声0.01弧度，0.573度的噪声
        joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
        #机器人关节速度，速度噪声 1.5弧度，大概85度每秒，
        # 对行走任务，最大关节速度90度/0.4秒或0.3  =300   噪声有点大感觉？
        joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-1.5, n_max=1.5))
        # 上次的action
        actions = ObsTerm(func=mdp.last_action)
        # 扫描高度信息，噪声0.1米？还是挺大的
        height_scan = ObsTerm(
            func=mdp.height_scan,
            params={"sensor_cfg": SceneEntityCfg("height_scanner")},
            noise=Unoise(n_min=-0.1, n_max=0.1),
            #剪裁到正负一米
            clip=(-1.0, 1.0),
        )
        #该policy类实例化后自动调用
        #启用数据腐化，可能是为了模拟传感器故障或环境噪声，使得观察数据更贴近实际应用中的不确定性。
        #启用将多个观察项合并成一个大的观察向量
        def __post_init__(self):
            self.enable_corruption = True
            self.concatenate_terms = True

    # observation groups
    policy: PolicyCfg = PolicyCfg()


@configclass
class EventCfg:
    """Configuration for events."""

    # startup
    # 启动模拟时，随机一下刚体物理属性
    physics_material = EventTerm(
        func=mdp.randomize_rigid_body_material,
        mode="startup",
        params={
            "asset_cfg": SceneEntityCfg("robot", body_names=".*"),
            #静摩擦系数范围被设置为 0.8 到 0.8
            "static_friction_range": (0.8, 0.8),
            #动
            "dynamic_friction_range": (0.6, 0.6),
            #恢复系数，没弹性，纯刚体
            "restitution_range": (0.0, 0.0),
            #64个值处理材料属性分布
            "num_buckets": 64,
        },
    )
    # 质量随机，随机范围是5，添加到原质量上
    add_base_mass = EventTerm(
        func=mdp.randomize_rigid_body_mass,
        mode="startup",
        params={
            "asset_cfg": SceneEntityCfg("robot", body_names="base"),
            "mass_distribution_params": (-5.0, 5.0),
            "operation": "add",
        },
    )

    # reset
    # 每次重置（跌倒或到时间）。
    # 重置时给外力，作用在base上
    base_external_force_torque = EventTerm(
        func=mdp.apply_external_force_torque,
        mode="reset",
        params={
            "asset_cfg": SceneEntityCfg("robot", body_names="base"),
            # 这里没有给外力
            "force_range": (0.0, 0.0),
            # 也没给外部扭矩
            "torque_range": (-0.0, 0.0),
        },
    )
    # 重置时 uniform（指定范围内均匀采样）
    # 对机器人状态随机
    # 不同初始条件下状态出发得到episode ，
    reset_base = EventTerm(
        func=mdp.reset_root_state_uniform,
        mode="reset",
        params={
            # body位置和朝向
            "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},
            # body速度和姿态都给随机
            "velocity_range": {
                "x": (-0.5, 0.5),
                "y": (-0.5, 0.5),
                "z": (-0.5, 0.5),
                "roll": (-0.5, 0.5),
                "pitch": (-0.5, 0.5),
                "yaw": (-0.5, 0.5),
            },
        },
    )
    # 重置机器人关节，通过一定的scale
    reset_robot_joints = EventTerm(
        func=mdp.reset_joints_by_scale,
        mode="reset",
        params={
            # 位置范围，有点大了
            "position_range": (0.5, 1.5),
            # 不给速度
            "velocity_range": (0.0, 0.0),
        },
    )

    # interval
    #interval定期触发
    # 通过设定线速度，来模式外力推机器人
    push_robot = EventTerm(
        func=mdp.push_by_setting_velocity,
        mode="interval",
        # 10秒多推一下
        interval_range_s=(10.0, 15.0),
        # X Y方向随机大小推一下
        params={"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}},
    )


#奖励
@configclass
class RewardsCfg:
    """Reward terms for the MDP."""

    # -- task
    # 任务奖励，跟踪Body的线速度指令
    # command_name 是 base_velocity，前面command 命名的
    #"std": math.sqrt(0.25) 容忍区间，为0.5 的标准差？这么大
    track_lin_vel_xy_exp = RewTerm(
        func=mdp.track_lin_vel_xy_exp, weight=1.0, params={"command_name": "base_velocity", "std": math.sqrt(0.25)}
    )
    # 航向角速度
    track_ang_vel_z_exp = RewTerm(
        func=mdp.track_ang_vel_z_exp, weight=0.5, params={"command_name": "base_velocity", "std": math.sqrt(0.25)}
    )


    # -- penalties
    # 惩罚项
    # Z轴线速度惩罚，保持质心高度 -2.0的惩罚
    lin_vel_z_l2 = RewTerm(func=mdp.lin_vel_z_l2, weight=-2.0)
    # 姿态惩罚，Body的pitch 和ROLL的姿态角速度
    ang_vel_xy_l2 = RewTerm(func=mdp.ang_vel_xy_l2, weight=-0.05)
    # 惩罚关节扭矩，权重很小
    dof_torques_l2 = RewTerm(func=mdp.joint_torques_l2, weight=-1.0e-5)
    # 关节加速度惩罚
    dof_acc_l2 = RewTerm(func=mdp.joint_acc_l2, weight=-2.5e-7)
    # 机器人动作输出变化惩罚，（用尽可能小的幅度动作）
    action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01)
    # 通过离地时间估计，尽可能让脚离开地面
    # 这个是自定义的，在MDP中的rewards.py文件中
    feet_air_time = RewTerm(
        func=mdp.feet_air_time,
        weight=0.125,# 0.125 
        params={
            # 使用contact_forces传感器，检测位置FOOT
            "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*FOOT"),
            # 与Body线速度指令相关，如果指令小，那给奖励就不合理了
            "command_name": "base_velocity",
            # 超过该阈值才会给奖励，0.5秒，通过该方式奖励行走步态，快了就给惩罚
            # 作为足式机器人重点的摆动腿时间，0.5秒是一个免于踏步式行走又能保持相应稳定性的很好的经验时间
            "threshold": 0.5,
        },
    )
    #惩罚与环境的不期望接触，如：
    undesired_contacts = RewTerm(
        func=mdp.undesired_contacts,
        weight=-1.0,
        params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*THIGH"), "threshold": 1.0},
    )
    # -- optional penalties
    # 水平姿态的奖励
    flat_orientation_l2 = RewTerm(func=mdp.flat_orientation_l2, weight=0.0)
    # 超出关节位置加惩罚
    dof_pos_limits = RewTerm(func=mdp.joint_pos_limits, weight=0.0)


@configclass
class TerminationsCfg:
    """Termination terms for the MDP."""
    # 超时终止
    time_out = DoneTerm(func=mdp.time_out, time_out=True)
    # 非法接触（倒地）
    base_contact = DoneTerm(
        func=mdp.illegal_contact,
        params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names="base"), "threshold": 1.0},
    )


@configclass
class CurriculumCfg:
    """Curriculum terms for the MDP."""
    # 课程学习
    # 逐步增加难度学习
    terrain_levels = CurrTerm(func=mdp.terrain_levels_vel)


##
# Environment configuration
##


@configclass
class LocomotionVelocityRoughEnvCfg(ManagerBasedRLEnvCfg):
    """Configuration for the locomotion velocity-tracking environment."""

    # Scene settings
    scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=2.5)
    # Basic settings
    observations: ObservationsCfg = ObservationsCfg()
    actions: ActionsCfg = ActionsCfg()
    commands: CommandsCfg = CommandsCfg()
    # MDP settings
    rewards: RewardsCfg = RewardsCfg()
    terminations: TerminationsCfg = TerminationsCfg()
    events: EventCfg = EventCfg()
    curriculum: CurriculumCfg = CurriculumCfg()

    def __post_init__(self):
        """Post initialization."""
        # general settings
        # 4个时间步采样一次
        self.decimation = 4
        # 模拟长度：20秒
        self.episode_length_s = 20.0
        # simulation settings
        self.sim.dt = 0.005 #时间步  200  HZ
        self.sim.render_interval = self.decimation
        self.sim.disable_contact_processing = True#禁用物理引擎中的碰撞检测或接触处理
        self.sim.physics_material = self.scene.terrain.physics_material#将物理材质设置为场景地形的物理材质
        # update sensor update periods
        # we tick all the sensors based on the smallest update period (physics update period)
        #高度传感器：4个时间步更新
        if self.scene.height_scanner is not None:
            self.scene.height_scanner.update_period = self.decimation * self.sim.dt
        # 接触传感器：单时间步更新
        if self.scene.contact_forces is not None:
            self.scene.contact_forces.update_period = self.sim.dt

        # check if terrain levels curriculum is enabled - if so, enable curriculum for terrain generator
        # this generates terrains with increasing difficulty and is useful for training
        # 地形等级课程，是否开启
        if getattr(self.curriculum, "terrain_levels", None) is not None:
            if self.scene.terrain.terrain_generator is not None:
                self.scene.terrain.terrain_generator.curriculum = True
        else:
            if self.scene.terrain.terrain_generator is not None:
                self.scene.terrain.terrain_generator.curriculum = False

3 针对上述文件几点说明

1 其中，func=mdp.base_lin_vel（以此为例）是什么，如何获取到机器人的速度信息

  base_lin_vel = ObsTerm(func=mdp.base_lin_vel, noise=Unoise(n_min=-0.1, n_max=0.1))

source\extensions\omni.isaac.lab\omni\isaac\lab\envs\mdp\observations.py

的文件下，说明了这是基于root frame坐标系的速度，相对本体的速度。

torch.Tensor，表示该函数返回一个张量，包含根坐标系中的线速度数据。

def base_lin_vel(env: ManagerBasedEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
    """Root linear velocity in the asset's root frame."""
    # extract the used quantities (to enable type-hinting)
    asset: RigidObject = env.scene[asset_cfg.name]
    return asset.data.root_lin_vel_b

那么基于Body的线速度，又是由当前姿态四元数和当前body在世界坐标系中的速度得出的

    @property
    def root_lin_vel_b(self) -> torch.Tensor:
        """Root linear velocity in base frame. Shape is (num_instances, 3).

        This quantity is the linear velocity of the articulation root's center of mass frame with
        respect to the articulation root's actor frame.
        """
        return math_utils.quat_rotate_inverse(self.root_quat_w, self.root_lin_vel_w)

这里是从root_state中得出的，具体不再纠结如何计算的，但在to sim 中一定要给到相同的状态的表达，相同坐标系的表达。需要对坐标变换及IMU，广义坐标，世界坐标系，本体坐标系有较好的理解。

isaaclab 里提供了十分完善的状态。

@property
    def root_state_w(self):
        """Root state ``[pos, quat, lin_vel, ang_vel]`` in simulation world frame. Shape is (num_instances, 13).

        The position and quaternion are of the articulation root's actor frame. Meanwhile, the linear and angular
        velocities are of the articulation root's center of mass frame.
        """
        if self._root_state_w.timestamp < self._sim_timestamp:
            # read data from simulation
            pose = self._root_physx_view.get_root_transforms().clone()
            pose[:, 3:7] = math_utils.convert_quat(pose[:, 3:7], to="wxyz")
            velocity = self._root_physx_view.get_root_velocities()
            # set the buffer data and timestamp
            self._root_state_w.data = torch.cat((pose, velocity), dim=-1)
            self._root_state_w.timestamp = self._sim_timestamp
        return self._root_state_w.data

2 这里func=mdp.ang_vel_xy_l2（以此为例）是从哪来的，首先，这显然是一个计算奖励函数。

实际上，IsaacLab中内置了很多奖励函数的计算。这个例子就是

   # 姿态惩罚，Body的pitch 和ROLL的姿态角速度
    ang_vel_xy_l2 = RewTerm(func=mdp.ang_vel_xy_l2, weight=-0.05)

source\extensions\omni.isaac.lab\omni\isaac\lab\envs\mdp\rewards.py该路径下

def ang_vel_xy_l2(env: ManagerBasedRLEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
    """Penalize xy-axis base angular velocity using L2 squared kernel."""
    # extract the used quantities (to enable type-hinting)
    asset: RigidObject = env.scene[asset_cfg.name]
    return torch.sum(torch.square(asset.data.root_ang_vel_b[:, :2]), dim=1)

这里，返回绕X 轴 Y 轴旋转的角速度的平方和

asset.data.root_ang_vel_b[:, :2]：选择张量中的前两个维度（即 xx 和 yy 轴的角速度）进行处理。
torch.square(...)：对 x和 y轴的角速度分别进行平方。
torch.sum(..., dim=1)：将平方结果在第一个维度上相加，得到绕 xx 和 yy 轴角速度平方的和。这个和值就是角速度的 L2 范数的平方和
square (),对该张量中每一个数都平方
sum(),dim = 1 ,时，按行求和，（原来是一个（2*1张量））返回一个一维张量

3 再来看看这个奖励

feet_air_time = RewTerm(func=mdp.feet_air_time,

路径：source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\manager_based\locomotion\velocity\mdp\rewards.py

显然，这里代表了自定义的奖励函数，属于mdp下的文件夹

def feet_air_time(
    env: ManagerBasedRLEnv, command_name: str, sensor_cfg: SceneEntityCfg, threshold: float
) -> torch.Tensor:
    """Reward long steps taken by the feet using L2-kernel.

    This function rewards the agent for taking steps that are longer than a threshold. This helps ensure
    that the robot lifts its feet off the ground and takes steps. The reward is computed as the sum of
    the time for which the feet are in the air.

    If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
    """
    # extract the used quantities (to enable type-hinting)
    #通过环境对象 env 获取到脚部接触传感器的数据
    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
    # compute the reward
    # 
    first_contact = contact_sensor.compute_first_contact(env.step_dt)[:, sensor_cfg.body_ids]
    #计算脚离开地面有多久了
    last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
    # first_contact：脚没接触地面，就是0，说明是落地之后才去算奖励
    # 大于时间门槛后就有奖励，低了就给惩罚

    reward = torch.sum((last_air_time - threshold) * first_contact, dim=1)
    # no reward for zero command
    # 这里要看指令，如果用户指令很小，那么期望机器人双脚支撑不动
    # X Y 线速度指令的L2范数与0.1 相比，小于就返回0；
    reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
    return reward

4 rough_env_cfg.py G1的不平整地形配置

source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\manager_based\locomotion\velocity\config\g1\rough_env_cfg.py

主要对奖励做了一些修改

并在play时做了具体的安排

from omni.isaac.lab.managers import RewardTermCfg as RewTerm
from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm
from omni.isaac.lab.utils import configclass

import omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp as mdp
from omni.isaac.lab_tasks.manager_based.locomotion.velocity.velocity_env_cfg import (
    #先看这个 velocity_env_cfg.py ，是所有速度任务中公用的
    LocomotionVelocityRoughEnvCfg,
    RewardsCfg,
)

##
# Pre-defined configs
##
#导入机器人最基本的配置
from omni.isaac.lab_assets import G1_MINIMAL_CFG  # isort: skip


@configclass
# 可以看到，直接继承了公用的velocity_env_cfg中的奖励
class G1Rewards(RewardsCfg):
    """Reward terms for the MDP."""
    # 终止惩罚，很大
    termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0)
    # XY 线速度指令跟踪
    # 重写了该函数，传入了新的track_lin_vel_xy_yaw_frame_exp，
    #原来是track_lin_vel_xy_exp
    track_lin_vel_xy_exp = RewTerm(
        func=mdp.track_lin_vel_xy_yaw_frame_exp,
        weight=1.0,
        params={"command_name": "base_velocity", "std": 0.5},
    )
    # Z角速度跟踪
    track_ang_vel_z_exp = RewTerm(
        func=mdp.track_ang_vel_z_world_exp, weight=2.0, params={"command_name": "base_velocity", "std": 0.5}
    )
    # 也覆盖了原父类中定义的奖励
    feet_air_time = RewTerm(
        func=mdp.feet_air_time_positive_biped,
        weight=0.25,
        params={
            "command_name": "base_velocity",
            "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_ankle_roll_link"),
            "threshold": 0.4,
        },
    )
    # 脚滑惩罚，代替摩擦锥约束
    #通过脚踝的力传感器得到的
    feet_slide = RewTerm(
        func=mdp.feet_slide,
        weight=-0.1,
        params={
            "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_ankle_roll_link"),
            "asset_cfg": SceneEntityCfg("robot", body_names=".*_ankle_roll_link"),
        },
    )
    # Penalize ankle joint limits
    # 限位惩罚，这里只管脚
    dof_pos_limits = RewTerm(
        func=mdp.joint_pos_limits,
        weight=-1.0,
        params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"])},
    )
    # Penalize deviation from default of the joints that are not essential for locomotion
    # 髋关节偏离默认位置的惩罚
    joint_deviation_hip = RewTerm(
        func=mdp.joint_deviation_l1,
        weight=-0.1,
        params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_hip_yaw_joint", ".*_hip_roll_joint"])},
    )
    # 惩罚手臂关节偏离默认位置。
    joint_deviation_arms = RewTerm(
        func=mdp.joint_deviation_l1,
        weight=-0.1,
        params={
            "asset_cfg": SceneEntityCfg(
                "robot",
                joint_names=[
                    ".*_shoulder_pitch_joint",
                    ".*_shoulder_roll_joint",
                    ".*_shoulder_yaw_joint",
                    ".*_elbow_pitch_joint",
                    ".*_elbow_roll_joint",
                ],
            )
        },
    )
    # 惩罚手指关节偏离默认位置。
    joint_deviation_fingers = RewTerm(
        func=mdp.joint_deviation_l1,
        weight=-0.05,
        params={
            "asset_cfg": SceneEntityCfg(
                "robot",
                joint_names=[
                    ".*_five_joint",
                    ".*_three_joint",
                    ".*_six_joint",
                    ".*_four_joint",
                    ".*_zero_joint",
                    ".*_one_joint",
                    ".*_two_joint",
                ],
            )
        },
    )
    #惩罚躯干关节偏离默认位置。是所有关节
    joint_deviation_torso = RewTerm(
        func=mdp.joint_deviation_l1,
        weight=-0.1,
        params={"asset_cfg": SceneEntityCfg("robot", joint_names="torso_joint")},
    )


@configclass
class TerminationsCfg:
    """Termination terms for the MDP."""
    # 超时或倒了就终止
    time_out = DoneTerm(func=mdp.time_out, time_out=True)
    base_contact = DoneTerm(
        func=mdp.illegal_contact,
        params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names="torso_link"), "threshold": 1.0},
    )

#继承了公共的
@configclass
class G1RoughEnvCfg(LocomotionVelocityRoughEnvCfg):
    #更新为；；
    rewards: G1Rewards = G1Rewards()
    terminations: TerminationsCfg = TerminationsCfg()
    # 进一步自定义
    def __post_init__(self):
        # post init of parent
        #使用父类构造
        super().__post_init__()
        # Scene
        # 机器人实体的的路径
        self.scene.robot = G1_MINIMAL_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
        # 设置地形高度传感器的部位 躯干
        self.scene.height_scanner.prim_path = "{ENV_REGEX_NS}/Robot/torso_link"

        # Randomization
        # 域随机化
        #不推机器人了
        self.events.push_robot = None
        #质量也有保证了
        self.events.add_base_mass = None
        # 机器人关节重置后保持在1 弧度？，还是相对的刻度？
        self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0)
        # 指定外力/扭矩事件作用于机器人的 torso_link 部位
        self.events.base_external_force_torque.params["asset_cfg"].body_names = ["torso_link"]
        # 重置时设定机器人的初始位置和速度的随机范围
        self.events.reset_base.params = {
            # body位置和朝向改变
            "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)},
            # body速度为0
            "velocity_range": {
                "x": (0.0, 0.0),
                "y": (0.0, 0.0),
                "z": (0.0, 0.0),
                "roll": (0.0, 0.0),
                "pitch": (0.0, 0.0),
                "yaw": (0.0, 0.0),
            },
        }

        # Rewards
        #Z方向线速度不管了，（不平整地面）一般都是L2的平方而不是L2范数，
        self.rewards.lin_vel_z_l2.weight = 0.0
        #非期望接触 0
        self.rewards.undesired_contacts = None
        # 惩罚姿态不平
        self.rewards.flat_orientation_l2.weight = -1.0
        # 动作变化稍微惩罚
        self.rewards.action_rate_l2.weight = -0.005
        # 关节加速度惩罚，不管脚的
        self.rewards.dof_acc_l2.weight = -1.25e-7
        self.rewards.dof_acc_l2.params["asset_cfg"] = SceneEntityCfg(
            "robot", joint_names=[".*_hip_.*", ".*_knee_joint"]
        )
        # 关节扭矩惩罚，管脚
        self.rewards.dof_torques_l2.weight = -1.5e-7
        self.rewards.dof_torques_l2.params["asset_cfg"] = SceneEntityCfg(
            "robot", joint_names=[".*_hip_.*", ".*_knee_joint", ".*_ankle_.*"]
        )

        # Commands
        self.commands.base_velocity.ranges.lin_vel_x = (0.0, 1.0)
        # 左右方向先不给
        self.commands.base_velocity.ranges.lin_vel_y = (-0.0, 0.0)
        # 转弯，57.3度1秒，正常合理范围
        self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0)


# 在训练完后推理时（也就是看训练结果时），使用了这配置
@configclass
class G1RoughEnvCfg_PLAY(G1RoughEnvCfg):
    def __post_init__(self):
        # post init of parent
        # 这里目的是最开始的公共速度的父类，定义了dt ,采样频率等基本内容
        super().__post_init__()

        # make a smaller scene for play
        # 50个机器人
        self.scene.num_envs = 50
        self.scene.env_spacing = 2.5
        # 一个回合40秒
        self.episode_length_s = 40.0
        # spawn the robot randomly in the grid (instead of their terrain levels)
        #在平地上测试
        self.scene.terrain.max_init_terrain_level = None
        # reduce the number of terrains to save memory
        #若有地形生成器
        if self.scene.terrain.terrain_generator is not None:
            self.scene.terrain.terrain_generator.num_rows = 5
            self.scene.terrain.terrain_generator.num_cols = 5
            #关闭地形生成的递进式训练功能
            self.scene.terrain.terrain_generator.curriculum = False
        
        # 指令速度1，在0.4的摆动腿下，基本一步需要往前跨0.4米，
        self.commands.base_velocity.ranges.lin_vel_x = (1.0, 1.0)
        # 
        self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0)
        # 允许角速度？，大概6秒原地转一圈的速度
        self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0)
        # 航向角为0
        self.commands.base_velocity.ranges.heading = (0.0, 0.0)
        # disable randomization for play
        # 禁用观测数据的腐化
        #观测腐化则是指在观测数据中引入一些人为的干扰或错误，但这里并没有给出
        #腐化的信噪比，均方误差等参数
        self.observations.policy.enable_corruption = False
        # remove random pushing
        # 外部力和力矩干扰
        self.events.base_external_force_torque = None
        # 不推机器人了
        self.events.push_robot = None

5 flat_env_cfg.py

source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\manager_based\locomotion\velocity\config\g1\flat_env_cfg.py

继承了rough

from omni.isaac.lab.managers import SceneEntityCfg
from omni.isaac.lab.utils import configclass

from .rough_env_cfg import G1RoughEnvCfg

# 平坦地形继承了不平整地形
@configclass
class G1FlatEnvCfg(G1RoughEnvCfg):
    def __post_init__(self):
        # post init of parent
        super().__post_init__()

        # change terrain to flat
        # 地形类型设定为
        self.scene.terrain.terrain_type = "plane"
        # 不使用地形生成器，意味着不生成随机地形特征
        self.scene.terrain.terrain_generator = None
        # no height scan
        # 高度扫描也不用
        self.scene.height_scanner = None
        # 也不观测
        self.observations.policy.height_scan = None
        # no terrain curriculum
        # 不使用地形课程
        self.curriculum.terrain_levels = None

        # Rewards
        # 角速度追踪奖励
        self.rewards.track_ang_vel_z_exp.weight = 1.0
        # Z速度加惩罚
        self.rewards.lin_vel_z_l2.weight = -0.2
        # 不变
        self.rewards.action_rate_l2.weight = -0.005
        self.rewards.dof_acc_l2.weight = -1.0e-7
        #
        self.rewards.feet_air_time.weight = 0.75
        #
        self.rewards.feet_air_time.params["threshold"] = 0.4
        # 加大一些惩罚
        self.rewards.dof_torques_l2.weight = -2.0e-6
        self.rewards.dof_torques_l2.params["asset_cfg"] = SceneEntityCfg(
            "robot", joint_names=[".*_hip_.*", ".*_knee_joint"]
        )
        # Commands
        self.commands.base_velocity.ranges.lin_vel_x = (0.0, 1.0)
        # 加了左右速度
        self.commands.base_velocity.ranges.lin_vel_y = (-0.5, 0.5)
        self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0)


class G1FlatEnvCfg_PLAY(G1FlatEnvCfg):
    def __post_init__(self) -> None:
        # post init of parent
        super().__post_init__()

        # make a smaller scene for play
        self.scene.num_envs = 50
        self.scene.env_spacing = 2.5
        # disable randomization for play
        self.observations.policy.enable_corruption = False
        # remove random pushing
        self.events.base_external_force_torque = None
        self.events.push_robot = None