仅记录,谨慎参考
关于使用isaacLab 训练机器人
1 运行及调用
.vscode\tools\launch.template.json
运行以下的函数,命令行参数在其中。
{
"name": "Python: Train Environment",
"type": "python",
"request": "launch",
"args" : ["--task", "Isaac-Reach-Franka-v0", "--headless"],
"program": "${workspaceFolder}/source/standalone/workflows/rsl_rl/train.py",
"console": "integratedTerminal"
},
{
"name": "Python: Play Environment",
"type": "python",
"request": "launch",
"args" : ["--task", "Isaac-Reach-Franka-v0", "--num_envs", "32"],
"program": "${workspaceFolder}/source/standalone/workflows/rsl_rl/play.py",
"console": "integratedTerminal"
}
下面注册了环境source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\manager_based\locomotion\velocity\config\g1\__init__.py
gym.register(
id="Isaac-Velocity-Rough-G1-v0",
entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": rough_env_cfg.G1RoughEnvCfg,
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1RoughPPORunnerCfg",
},
)
gym.register(
id="Isaac-Velocity-Rough-G1-Play-v0",
# 入口点用于导入环境类,符合该接口的类
entry_point="omni.isaac.lab.envs:ManagerBasedRLEnv",
disable_env_checker=True,
kwargs={
# 创建环境实例
"env_cfg_entry_point": rough_env_cfg.G1RoughEnvCfg_PLAY,
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1RoughPPORunnerCfg",
},
)
下面是train:基于rsl_rl库的。
source\standalone\workflows\rsl_rl\train.py
# add argparse arguments
parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
#视频,布尔,默认不录视频
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
# 默认录制200步step的视频
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
# 默认2000步step记录一次视频
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
# 模拟的环境数量,没有指定,需要其他给定
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
# 任务名称
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
# 环境的随机种子,保证训练的可重复性,
# 用相同的随机种子,后面的生成的随机序列是一样的。
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
# 最大迭代次数
parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
# append RSL-RL cli arguments
# RSL-RL 相关的命令行参数添加到 ArgumentParser
cli_args.add_rsl_rl_args(parser)
# append AppLauncher cli args
# 启动参数
AppLauncher.add_app_launcher_args(parser)
# 解析出的标准参数和其他参数
args_cli, hydra_args = parser.parse_known_args()
# always enable cameras to record video
# 传入了录制视频参数,就打开摄像头
if args_cli.video:
args_cli.enable_cameras = True
# clear out sys.argv for Hydra
# sys.argv[0] 通常是脚本的名称(例如,train.py),而 hydra_args 则是 parse_known_args() 方法返回的 Hydra 特定参数
# 避免冲突
sys.argv = [sys.argv[0]] + hydra_args
# launch omniverse app
# 用解析后的命令行参数配置
app_launcher = AppLauncher(args_cli)
simulation_app = app_launcher.app
"""Rest everything follows."""
import gymnasium as gym#工具包
import os#操作系统交互可以用于获取当前工作目录、创建文件夹、操作文件路径等
import torch##
from datetime import datetime#日期和时间
from rsl_rl.runners import OnPolicyRunner#导入强化学习训练库
from omni.isaac.lab.envs import DirectRLEnvCfg, ManagerBasedRLEnvCfg#Lab的直接或者管理器方式
from omni.isaac.lab.utils.dict import print_dict#读字典
from omni.isaac.lab.utils.io import dump_pickle, dump_yaml#将数据对象序列化为 Python 的 pickle 格式,YAML格式
import omni.isaac.lab_tasks # noqa: F401
from omni.isaac.lab_tasks.utils import get_checkpoint_path#检测点
from omni.isaac.lab_tasks.utils.hydra import hydra_task_config# Hydra 配置管理框架
#包装器,将强化学习环境批量环境等
from omni.isaac.lab_tasks.utils.wrappers.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
torch.backends.cuda.matmul.allow_tf32 = True#矩阵乘法加速
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False
#通过装饰器传入任务
@hydra_task_config(args_cli.task, "rsl_rl_cfg_entry_point")
#环境和智能体配置
def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg):
"""Train with RSL-RL agent."""
# override configurations with non-hydra CLI arguments
#用命令行参数更新agent配置
agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
#环境数量,如果命令行没指定,就默认
env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
#最大迭代
agent_cfg.max_iterations = (
args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations
)
# set the environment seed
# note: certain randomizations occur in the environment initialization so we set the seed here
#为了得到相同的结果
env_cfg.seed = agent_cfg.seed
#---------------------------------------------------------------
# specify directory for logging experiments
# 日志文件的根目录
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
#绝对路径
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Logging experiment in directory: {log_root_path}")
# specify directory for logging runs: {time-stamp}_{run_name}
log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
#如果代理配置中定义了 run_name,那么该名称将会添加到日志目录的末尾
if agent_cfg.run_name:
log_dir += f"_{agent_cfg.run_name}"
log_dir = os.path.join(log_root_path, log_dir)
#---------------------------------------------------------------
# create isaac environment
#重点:创建环境,输入注册的任务,环境配置,录视频就改变渲染模式
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
#----------------------------------------------------------------
# wrap for video recording
#视频录制包装
if args_cli.video:
video_kwargs = {
#设置视频文件夹路径,
"video_folder": os.path.join(log_dir, "videos", "train"),
#一个函数,定义了何时录制视频。在这里,视频录制每隔 args_cli.video_interval 步就触发一次
"step_trigger": lambda step: step % args_cli.video_interval == 0,
#
"video_length": args_cli.video_length,
#禁用环境的日志记录功能
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
#
env = gym.wrappers.RecordVideo(env, **video_kwargs)
#---------------------------------------------------------------------
# wrap around environment for rsl-rl
#是一个自定义的环境包装器,用于将环境适配为 rsl-rl 强化学习框架所需的格式
env = RslRlVecEnvWrapper(env)
#---------------------------------------------------------------------
#强化学习的训练运行器
# create runner from rsl-rl
# 重点:rsl-rl库的训练运行器
# 参数为:环境,字典形式的agent配置,日志路径,训练的硬件设备
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
# write git state to logs
# git状态
runner.add_git_repo_to_log(__file__)
# save resume path before creating a new log_dir
#新路径前保存恢复路径,用于中断后继续
if agent_cfg.resume:
# get path to previous checkpoint
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
runner.load(resume_path)
#---------------------------------------------------------------------
# dump the configuration into log-directory
# 对象序列化并保存到文件中去
#环境配置
dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
#agent配置
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
#---------------------------------------------------------------------
# run training
#开始训练
# 参数为最大迭代次数、 初始时随机的episode长度
runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
#--------------------------------------------------------------------
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()
source\standalone\workflows\rsl_rl\play.py
def main():
"""Play with RSL-RL agent."""
# parse configuration
env_cfg = parse_env_cfg(
args_cli.task, device=args_cli.device, num_envs=args_cli.num_envs, use_fabric=not args_cli.disable_fabric
)
agent_cfg: RslRlOnPolicyRunnerCfg = cli_args.parse_rsl_rl_cfg(args_cli.task, args_cli)
# specify directory for logging experiments
log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
log_root_path = os.path.abspath(log_root_path)
print(f"[INFO] Loading experiment from directory: {log_root_path}")
#获取路径
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
log_dir = os.path.dirname(resume_path)
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
# wrap for video recording
if args_cli.video:
video_kwargs = {
"video_folder": os.path.join(log_dir, "videos", "play"),
"step_trigger": lambda step: step == 0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env)
print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
# 加载模型
ppo_runner.load(resume_path)
# obtain the trained policy for inference
# 获取训练好的策略,在指定设备上运行
policy = ppo_runner.get_inference_policy(device=env.unwrapped.device)
# export policy to onnx/jit
#导出模型(从加载模型时的路径)
export_model_dir = os.path.join(os.path.dirname(resume_path), "exported")
export_policy_as_jit(
ppo_runner.alg.actor_critic, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt"
)
#PPO 的 actor-critic 网络,观测归一化,导出路径,导出文件名
export_policy_as_onnx(
ppo_runner.alg.actor_critic, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx"
)
#---------------------------------------------------------
# reset environment
#获取环境观测
obs, _ = env.get_observations()
timestep = 0
# simulate environment
while simulation_app.is_running():
# run everything in inference mode
#推理模式
with torch.inference_mode():
# agent stepping
#策略网络:输入obs,输出动作
actions = policy(obs)
# env stepping
#依据动作进入到环境中,获取下一obs,省去的其他信息一般为奖励,终止等
obs, _, _, _ = env.step(actions)
if args_cli.video:
timestep += 1
# Exit the play loop after recording one video
#达到长度,就不录了
if timestep == args_cli.video_length:
break
# close the simulator
env.close()
if __name__ == "__main__":
# run the main function
main()
# close sim app
simulation_app.close()
2 调用原理
2.1 注意的地方
在星动纪元代码中,我们可以比较清楚的找到合理的操作步骤。
在isaaclab中,基本上隔离了原始的步骤和逻辑,尽可能留出MDP配置进行修改,但显然具有很大的缺点,在内部执行步骤不清楚的情况下很难做修改,对于输入和输出的操作也不是很清晰。
通过以下代码:将阐述策略输出的动作,如何加工后给到仿真环境,这是非常重要的,同时,如何运行然后给到反馈。
然后就能清楚看到,在managerBased 的方式中,G1为例,策略输出动作后,
#1 记录了原始的动作(作为历史的action用)
#2 加工过的动作:原动作*缩放系数 + 设定的默认的位置、
并没有剪裁,加噪声,延迟,限位等。
对于策略输出的action ,在sim中需要与训练进行同样的缩放及剪裁(噪声可不加),确保有同样的动作输出给到机器人。
对于Obs噪声,示例中有添加。
2.2 通过step,看发生什么
依据策略输出动作,然后动作给到这个step
ManagerBasedEnv及ManagerBasedRLEnv,这个是核心的,也是isaacLab的主体,所有代码都按这个框架走。
:source\extensions\omni.isaac.lab\omni\isaac\lab\envs\manager_based_rl_env.py
def step(self, action: torch.Tensor) -> VecEnvStepReturn:
"""Execute one time-step of the environment's dynamics and reset terminated environments.
Unlike the :class:`ManagerBasedEnv.step` class, the function performs the following operations:
1. Process the actions.
2. Perform physics stepping.
3. Perform rendering if gui is enabled.
4. Update the environment counters and compute the rewards and terminations.
5. Reset the environments that terminated.
6. Compute the observations.
7. Return the observations, rewards, resets and extras.
Args:
action: The actions to apply on the environment. Shape is (num_envs, action_dim).
Returns:
A tuple containing the observations, rewards, resets (terminated and truncated) and extras.
"""
# process actions
# 验证动作
self.action_manager.process_action(action.to(self.device))
# check if we need to do rendering within the physics loop
# note: checked here once to avoid multiple checks within the loop
is_rendering = self.sim.has_gui() or self.sim.has_rtx_sensors()
# perform physics stepping
#物理步进
for _ in range(self.cfg.decimation):
self._sim_step_counter += 1
# set actions into buffers
# 动作拿到缓冲区
self.action_manager.apply_action()
# set actions into simulator
# 动作给仿真
self.scene.write_data_to_sim()
# simulate
self.sim.step(render=False)
# render between steps only if the GUI or an RTX sensor needs it
# note: we assume the render interval to be the shortest accepted rendering interval.
# If a camera needs rendering at a faster frequency, this will lead to unexpected behavior.
if self._sim_step_counter % self.cfg.sim.render_interval == 0 and is_rendering:
self.sim.render()
# update buffers at sim dt
self.scene.update(dt=self.physics_dt)
# post-step:
# -- update env counters (used for curriculum generation)
self.episode_length_buf += 1 # step in current episode (per env)
self.common_step_counter += 1 # total step (common for all envs)
# -- check terminations
self.reset_buf = self.termination_manager.compute()
self.reset_terminated = self.termination_manager.terminated
self.reset_time_outs = self.termination_manager.time_outs
# -- reward computation
self.reward_buf = self.reward_manager.compute(dt=self.step_dt)
# -- reset envs that terminated/timed-out and log the episode information
reset_env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
if len(reset_env_ids) > 0:
self._reset_idx(reset_env_ids)
# -- update command
self.command_manager.compute(dt=self.step_dt)
# -- step interval events
if "interval" in self.event_manager.available_modes:
self.event_manager.apply(mode="interval", dt=self.step_dt)
# -- compute observations
# note: done after reset to get the correct observations for reset envs
self.obs_buf = self.observation_manager.compute()
# return observations, rewards, resets and extras
return self.obs_buf, self.reward_buf, self.reset_terminated, self.reset_time_outs, self.extras
可以看到,step进来后直接process_action(), 也就是策略网络输出的动作进了这个操作。
其中:source\extensions\omni.isaac.lab\omni\isaac\lab\managers\action_manager.py:
def process_action(self, action: torch.Tensor):
"""Processes the actions sent to the environment.
Note:
This function should be called once per environment step.
Args:
action: The actions to process.
"""
# check if action dimension is valid
# 检查动作维度合法,self.total_action_dim 是期望的动作维度
if self.total_action_dim != action.shape[1]:
raise ValueError(f"Invalid action shape, expected: {self.total_action_dim}, received: {action.shape[1]}.")
# store the input actions
# 保存上次的动作。更新当前的动作
self._prev_action[:] = self._action
self._action[:] = action.to(self.device)
# split the actions and apply to each tensor
# 根据当前额定idx索引值,切片出对应部分的动作数据
idx = 0
for term in self._terms.values():
term_actions = action[:, idx : idx + term.action_dim]
term.process_actions(term_actions)# 处理要发送到环境的操作
idx += term.action_dim
2.3 对anction 进行了什么操作
进入对单个动作的操作:
source\extensions\omni.isaac.lab\omni\isaac\lab\managers\action_manager.py
是一个ActionTerm的抽象方法,找一下谁继承
@abstractmethod
def process_actions(self, actions: torch.Tensor):
"""Processes the actions sent to the environment.
Note:
This function is called once per environment step by the manager.
Args:
actions: The actions to process.
"""
raise NotImplementedError
然后:class JointAction(ActionTerm):
在下面实现上面的抽象方法:
1 记录了原始的动作
2 加工过的动作:原动作*缩放系数 + 设定的默认的位置。
source\extensions\omni.isaac.lab\omni\isaac\lab\envs\mdp\actions\joint_actions.py
class JointAction(ActionTerm):
def process_actions(self, actions: torch.Tensor):
# store the raw actions
self._raw_actions[:] = actions
# apply the affine transformations
self._processed_actions = self._raw_actions * self._scale + self._offset
看看谁用了JointAction这个类:
source\extensions\omni.isaac.lab\omni\isaac\lab\envs\mdp\actions\actions_cfg.py:
@configclass
class JointPositionActionCfg(JointActionCfg):
"""Configuration for the joint position action term.
See :class:`JointPositionAction` for more details.
"""
class_type: type[ActionTerm] = joint_actions.JointPositionAction
use_default_offset: bool = True
"""Whether to use default joint positions configured in the articulation asset as offset.
Defaults to True.
If True, this flag results in overwriting the values of :attr:`offset` to the default joint positions
from the articulation asset.
"""
上面可以看到:class_type: type[ActionTerm] = joint_actions.JointPositionAction。
joint_actions是一个文件
那么JointPositionAction 是一个继承了ActionTerm的类,它的父类的父类是ActionTerm
source\extensions\omni.isaac.lab\omni\isaac\lab\envs\mdp\actions\joint_actions.py
class JointPositionAction(JointAction):
"""Joint action term that applies the processed actions to the articulation's joints as position commands."""
cfg: actions_cfg.JointPositionActionCfg
"""The configuration of the action term."""
def __init__(self, cfg: actions_cfg.JointPositionActionCfg, env: ManagerBasedEnv):
# initialize the action term
super().__init__(cfg, env)
# use default joint positions as offset
if cfg.use_default_offset:
self._offset = self._asset.data.default_joint_pos[:, self._joint_ids].clone()
def apply_actions(self):
# set position targets
self._asset.set_joint_position_target(self.processed_actions, joint_ids=self._joint_ids)
以上就能看到,在process_action()中,除了前面提到那两步,没有多余的操作了。
下面看 step()中步进的apply_actions:
也是抽象方法的继承形式:具体执行了这个:
self._asset.set_joint_position_target(self.processed_actions, joint_ids=self._joint_ids)
将刚刚加工过的action进行下面的操作:
def set_joint_position_target(
self, target: torch.Tensor, joint_ids: Sequence[int] | slice | None = None, env_ids: Sequence[int] | None = None
):
"""Set joint position targets into internal buffers.
.. note::
This function does not apply the joint targets to the simulation. It only fills the buffers with
the desired values. To apply the joint targets, call the :meth:`write_data_to_sim` function.
Args:
target: Joint position targets. Shape is (len(env_ids), len(joint_ids)).
joint_ids: The joint indices to set the targets for. Defaults to None (all joints).
env_ids: The environment indices to set the targets for. Defaults to None (all environments).
"""
# resolve indices
# 所有的关节和环境
if env_ids is None:
env_ids = slice(None)
if joint_ids is None:
joint_ids = slice(None)
# broadcast env_ids if needed to allow double indexing
# 如果用户提供了具体的关节和环境索引
if env_ids != slice(None) and joint_ids != slice(None):
env_ids = env_ids[:, None]
# set targets
# Shape is (len(env_ids), len(joint_ids)
self._data.joint_pos_target[env_ids, joint_ids] = target
其中,上面的class Articulation(AssetBase):中
self._data = ArticulationData(self.root_physx_view, self.device)
joint_pos_target是整个的关节的位置变量,Shape is (len(env_ids), len(joint_ids)
以上,可以知道,将动作设定到了关节层的目标位置中。
然后是self.scene.write_data_to_sim(),输入到仿真环境中去,看看执行了什么:
source\extensions\omni.isaac.lab\omni\isaac\lab\scene\interactive_scene.py
def write_data_to_sim(self):
"""Writes the data of the scene entities to the simulation."""
# -- assets
# 关节层数据下发
for articulation in self._articulations.values():
articulation.write_data_to_sim()
# 可形变状态、力、位置等
for deformable_object in self._deformable_objects.values():
deformable_object.write_data_to_sim()
# 刚体状态
for rigid_object in self._rigid_objects.values():
rigid_object.write_data_to_sim()
然后就是self.scene.update(dt=self.physics_dt),经过时间后更新场景的数据,包括各物体,关节的数据。
2.4 管理器方式和direct方式的对action的区别
再看看不一样的,在direct方式中:
是加了噪声的(没找到执行在哪)
action = action.to(self.device)
# add action noise
if self.cfg.action_noise_model:
action = self._action_noise_model.apply(action)
# process actions
self._pre_physics_step(action)
在source\extensions\omni.isaac.lab\omni\isaac\lab\utils\noise\noise_model.py:中,定义了一些噪声类型。
在source\extensions\omni.isaac.lab\omni\isaac\lab\envs\direct_rl_env.py中,是允许增加噪声的,如果有配置噪声的话
# reset noise models
if self.cfg.action_noise_model:
self._action_noise_model.reset(env_ids)
if self.cfg.observation_noise_model:
self._observation_noise_model.reset(env_ids)
举例: source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\direct\shadow_hand\shadow_hand_env_cfg.py中:这样配置噪声,是class NoiseModel的子类,配置信息使用:GaussianNoiseCfg数据类型。它调用了gaussian_noise函数。
并且规定了三个类型(增量,比例缩放,按自定义的均值进行增量添加):add,scale,abs,使用标准差调节。
@configclass
class GaussianNoiseCfg(NoiseCfg):
"""Configuration for an additive gaussian noise term."""
func = noise_model.gaussian_noise
mean: torch.Tensor | float = 0.0
"""The mean of the noise. Defaults to 0.0."""
std: torch.Tensor | float = 1.0
"""The standard deviation of the noise. Defaults to 1.0."""
# at every time-step add gaussian noise + bias. The bias is a gaussian sampled at reset
action_noise_model: NoiseModelWithAdditiveBiasCfg = NoiseModelWithAdditiveBiasCfg(
noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.05, operation="add"),
bias_noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.015, operation="abs"),
)
# at every time-step add gaussian noise + bias. The bias is a gaussian sampled at reset
observation_noise_model: NoiseModelWithAdditiveBiasCfg = NoiseModelWithAdditiveBiasCfg(
noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.002, operation="add"),
bias_noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.0001, operation="abs"),
)
下面source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\direct\franka_cabinet\franka_cabinet_env.py。
使用了这样的动作操作:
def _pre_physics_step(self, actions: torch.Tensor):
# 剪裁到【-1,1】
self.actions = actions.clone().clamp(-1.0, 1.0)
# 机器人当前目标位置+ 速度缩放*dt*位置增量*动作缩放
targets = self.robot_dof_targets + self.robot_dof_speed_scales * self.dt * self.actions * self.cfg.action_scale
# 限制在机器人上下限之间
self.robot_dof_targets[:] = torch.clamp(targets, self.robot_dof_lower_limits, self.robot_dof_upper_limits)
action_scale前面剪裁到正负1 ,那么实际中就必须按运动范围给一个值,如:franka中action_scale = 7.5。
可以看到,其它案例中,进行了剪裁,然后给了合适的自定义操作或者缩放系数。这些G1的demo中都没有添加。
原代码感觉比较乱和不完善,在管理器方式中,并没有给出噪声的配置和运行。
需要按direct方式中,自己在cfg文件中定义,ManagerBasedRLEnvCfg中添加相应的运行代码,如下方式。
# setup noise cfg for adding action and observation noise
if self.cfg.action_noise_model:
self._action_noise_model: NoiseModel = self.cfg.action_noise_model.class_type(
self.cfg.action_noise_model, num_envs=self.num_envs, device=self.device
)
if self.cfg.observation_noise_model:
self._observation_noise_model: NoiseModel = self.cfg.observation_noise_model.class_type(
self.cfg.observation_noise_model, num_envs=self.num_envs, device=self.device
)
2. 5 动作给仿真环境后,执行了什么
# post-step:
# -- update env counters (used for curriculum generation)
# 当前每个环境的ep 长度+1。
self.episode_length_buf += 1 # step in current episode (per env)
# # 全局步骤计数器,记录所有环境的总步骤数
self.common_step_counter += 1 # total step (common for all envs)
# -- check terminations、
# 检测重置
self.reset_buf = self.termination_manager.compute()
self.reset_terminated = self.termination_manager.terminated
self.reset_time_outs = self.termination_manager.time_outs
# -- reward computation
# 在新的一次MDP采样过程中,计算单个环境的实时奖励。
# 里面还计算了单个奖励项在reset前的累计值
self.reward_buf = self.reward_manager.compute(dt=self.step_dt)
# -- reset envs that terminated/timed-out and log the episode information
# 找出重置的环境,并返回非0的(重置)索引id
reset_env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
# 重置环境
if len(reset_env_ids) > 0:
self._reset_idx(reset_env_ids)
# -- update command
# 更新指令
self.command_manager.compute(dt=self.step_dt)
# -- step interval events
# 若存在定义的间隔
if "interval" in self.event_manager.available_modes:
self.event_manager.apply(mode="interval", dt=self.step_dt)
# -- compute observations
# 计算观测,在环境等重置后进行。
# 里面涉及到了对Obs的噪声,剪裁,缩放。
# note: done after reset to get the correct observations for reset envs
self.obs_buf = self.observation_manager.compute()
# return observations, rewards, resets and extras
return self.obs_buf, self.reward_buf, self.reset_terminated, self.reset_time_outs, self.extras
2.6 看看观测的噪声代码流程:
source\extensions\omni.isaac.lab\omni\isaac\lab\managers\observation_manager.py
for name, term_cfg in obs_terms:
# compute term's value
obs: torch.Tensor = term_cfg.func(self._env, **term_cfg.params).clone()
# apply post-processing
if term_cfg.modifiers is not None:
for modifier in term_cfg.modifiers:
obs = modifier.func(obs, **modifier.params)
if term_cfg.noise:
obs = term_cfg.noise.func(obs, term_cfg.noise)
if term_cfg.clip:
obs = obs.clip_(min=term_cfg.clip[0], max=term_cfg.clip[1])
if term_cfg.scale:
obs = obs.mul_(term_cfg.scale)
# add value to list
group_obs[name] = obs
如果配置中有noise,就调用这个函数(参数是tenser ,Noise子类类型的的配置)。
上面的代码在每个MDP步骤后会计算,
配置:
@configclass
class ObservationTermCfg(ManagerTermBaseCfg):
"""Configuration for an observation term."""
func: Callable[..., torch.Tensor] = MISSING
"""The name of the function to be called.
This function should take the environment object and any other parameters
as input and return the observation signal as torch float tensors of
shape (num_envs, obs_term_dim).
"""
modifiers: list[ModifierCfg] | None = None
"""The list of data modifiers to apply to the observation in order. Defaults to None,
in which case no modifications will be applied.
Modifiers are applied in the order they are specified in the list. They can be stateless
or stateful, and can be used to apply transformations to the observation data. For example,
a modifier can be used to normalize the observation data or to apply a rolling average.
For more information on modifiers, see the :class:`~omni.isaac.lab.utils.modifiers.ModifierCfg` class.
"""
noise: NoiseCfg | None = None
"""The noise to add to the observation. Defaults to None, in which case no noise is added."""
clip: tuple[float, float] | None = None
"""The clipping range for the observation after adding noise. Defaults to None,
in which case no clipping is applied."""
scale: float | None = None
"""The scale to apply to the observation after clipping. Defaults to None,
in which case no scaling is applied (same as setting scale to :obj:`1`)."""
添加配置:
source\extensions\omni.isaac.lab_tasks\omni\isaac\lab_tasks\manager_based\locomotion\velocity\velocity_env_cfg.py
base_lin_vel = ObsTerm(func=mdp.base_lin_vel, noise=Unoise(n_min=-0.1, n_max=0.1)) 。
可以看到,ObservationTermCfg给了一个noise: NoiseCfg | None = None,然后又允许传函数
@configclass
class UniformNoiseCfg(NoiseCfg):
"""Configuration for a additive uniform noise term."""
func = noise_model.uniform_noise
n_min: torch.Tensor | float = -1.0
"""The minimum value of the noise. Defaults to -1.0."""
n_max: torch.Tensor | float = 1.0
"""The maximum value of the noise. Defaults to 1.0."""
在计算当中,看配置中是否有noise,有的话就按定义的函数和参数,调用相对应的函数。
但对于ActionTermCfg,就没写。当然我们也可以学习Observe,手动修改代码,
@configclass
class ActionTermCfg:
"""Configuration for an action term."""
class_type: type[ActionTerm] = MISSING
"""The associated action term class.
The class should inherit from :class:`omni.isaac.lab.managers.action_manager.ActionTerm`.
"""
asset_name: str = MISSING
"""The name of the scene entity.
This is the name defined in the scene configuration file. See the :class:`InteractiveSceneCfg`
class for more details.
"""
debug_vis: bool = False
"""Whether to visualize debug information. Defaults to False."""
动作噪声添加例:在 这个配置中,加上噪声项,直接多加参数:如 noise=Unoise(n_min=-0.1, n_max=0.1)
@configclass
class ActionsCfg:
"""Action specifications for the MDP."""
#匹配所有关节,输出的关节位置* 0.5,是基于默认的关节偏移量基础上的动作
joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True)
在 class JointPositionActionCfg(JointActionCfg): 中加入
noise: NoiseCfg | None = None
在class JointAction(ActionTerm):
中的process_actions()中(因为cfg: actions_cfg.JointActionCfg类型的):
def process_actions(self, actions: torch.Tensor):
# store the raw actions
self._raw_actions[:] = actions
#添加:
if cfg.noise:
self._processed_actions = cfg.noise.func(_raw_actions.clone(), cfg.noise)
#end
# apply the affine transformations
self._processed_actions = self._raw_actions * self._scale + self._offset