In last blog, we created a xml file that can be verified in mujoco simulator. But when i tested it in mujoco py and try to training it using HER of baseline, it showed one error, this error means every body must had at least one "site" which would be used in follow environment training stage. So the verified xml file was as shown in the following (under path of "~/gym/gym/envs/robotics/assets/fetch"): P.s. 'target0' was used to visualize the target during training stage in <!--load floor--> part!
<?xml version="1.0" encoding="utf-8"?>
<mujoco>
<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler><!--place you stl or texture png file in this path-->
<option timestep="0.002">
<flag warmstart="enable"></flag>
</option>
<include file="shared.xml"></include>
<!--add some assert -->
<asset>
<texture file="light-wood.png" type="cube" name="light-wood" />
<mesh file="chair_seat.STL" scale="0.001 0.001 0.001" name="chair_seat_mesh"/>
<mesh file="chair_leg.STL" scale="0.001 0.001 0.001" name="chair_leg_mesh"/>
<material name="MatLightWood" texture="light-wood" texrepeat="3 3" specular="0.4" shininess="0.1" />
</asset>
<worldbody>
<!--visulize x,y,and z axes-->
<site name="x_axes" pos="0.5 0 0" size="0.5 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="y_axes" pos="0 0.5 0" size="0.01 0.5 0.01" rgba="0 1 0 1" type="box"></site>
<site name="z_axes" pos="0 0 0.5" size="0.01 0.01 0.5" rgba="0 0 1 1" type="box"></site>
<!--load floor-->
<geom name="floor0" pos="0.8 0.2 0" size="1.2 1.4 1" type="plane" condim="3" material="floor_mat"></geom>
<body name="floor0" pos="0.8 0.2 0">
<site name="floor0x" pos="0.04 0 0" size="0.04 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="floor0y" pos="0 0.04 0" size="0.01 0.04 0.01" rgba="0 1 0 1" type="box"></site>
<site name="floor0z" pos="0 0 0.04" size="0.01 0.01 0.04" rgba="0 0 1 1" type="box"></site>
<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<!--load chair parts-->
<!--load seat-->
<body pos="0 0.8 0" name="chair_seat">
<joint name="chair_seat:joint" type="free" damping="0.01"></joint>
<geom mesh="chair_seat_mesh" type="mesh" solimp="0.998 0.998 0.001" solref="0.001 1" density="50" friction="0.95 0.3 0.1" material="MatLightWood"/>
<site name="chair_seat_x_axes" pos="0.1 0 0" size="0.1 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="chair_seat_y_axes" pos="0 0.1 0" size="0.01 0.1 0.01" rgba="0 1 0 1" type="box"></site>
<site name="chair_seat_z_axes" pos="0 0 0.1" size="0.01 0.01 0.1" rgba="0 0 1 1" type="box"></site>
<site name="chair_seat" pos="0 0 0" size="0.02 0.02 0.02"></site>
</body>
<!--load leg0-->
<body pos="0 1.45 0" name="chair_leg0">
<joint name="chair_leg0:joint" type="free" damping="0.01"></joint>
<geom mesh="chair_leg_mesh" type="mesh" solimp="0.998 0.998 0.001" solref="0.001 1" density="50" friction="0.95 0.3 0.1" material="MatLightWood"/>
<site name="chair_leg0_x_axes" pos="0.1 0 0" size="0.1 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="chair_leg0_y_axes" pos="0 0.1 0" size="0.01 0.1 0.01" rgba="0 1 0 1" type="box"></site>
<site name="chair_leg0_z_axes" pos="0 0 0.1" size="0.01 0.01 0.1" rgba="0 0 1 1" type="box"></site>
<site name="chair_leg0" pos="0 0 0" size="0.02 0.02 0.02"></site>
</body>
<!--load leg1-->
<body pos="0.2 1.45 0" name="chair_leg1">
<joint name="chair_leg1:joint" type="free" damping="0.01"></joint>
<geom mesh="chair_leg_mesh" type="mesh" solimp="0.998 0.998 0.001" solref="0.001 1" density="50" friction="0.95 0.3 0.1" material="MatLightWood"/>
<site name="chair_leg1_x_axes" pos="0.1 0 0" size="0.1 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="chair_leg1_y_axes" pos="0 0.1 0" size="0.01 0.1 0.01" rgba="0 1 0 1" type="box"></site>
<site name="chair_leg1_z_axes" pos="0 0 0.1" size="0.01 0.01 0.1" rgba="0 0 1 1" type="box"></site>
<site name="chair_leg1" pos="0 0 0" size="0.02 0.02 0.02"></site>
</body>
<!--load leg2-->
<body pos="0.4 1.45 0" name="chair_leg2">
<joint name="chair_leg2:joint" type="free" damping="0.01"></joint>
<geom mesh="chair_leg_mesh" type="mesh" solimp="0.998 0.998 0.001" solref="0.001 1" density="50" friction="0.95 0.3 0.1" material="MatLightWood"/>
<site name="chair_leg2_x_axes" pos="0.1 0 0" size="0.1 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="chair_leg2_y_axes" pos="0 0.1 0" size="0.01 0.1 0.01" rgba="0 1 0 1" type="box"></site>
<site name="chair_leg2_z_axes" pos="0 0 0.1" size="0.01 0.01 0.1" rgba="0 0 1 1" type="box"></site>
<site name="chair_leg2" pos="0 0 0" size="0.02 0.02 0.02"></site>
</body>
<!--load leg3-->
<body pos="0.6 1.45 0" name="chair_leg3">
<joint name="chair_leg3:joint" type="free" damping="0.01"></joint>
<geom mesh="chair_leg_mesh" type="mesh" solimp="0.998 0.998 0.001" solref="0.001 1" density="50" friction="0.95 0.3 0.1" material="MatLightWood"/>
<site name="chair_leg3_x_axes" pos="0.1 0 0" size="0.1 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="chair_leg3_y_axes" pos="0 0.1 0" size="0.01 0.1 0.01" rgba="0 1 0 1" type="box"></site>
<site name="chair_leg3_z_axes" pos="0 0 0.1" size="0.01 0.01 0.1" rgba="0 0 1 1" type="box"></site>
<site name="chair_leg3" pos="0 0 0" size="0.02 0.02 0.02"></site>
</body>
<!--load fetch robot-->
<include file="robot.xml"></include>
<!--load table-->
<body pos="1.3 0.2 0.2" name="table0">
<geom size="0.5 0.7 0.2" type="box" mass="2000" material="table_mat" rgba="1 1 1 0.5"></geom>
<site name="table0_x_axes" pos="0.1 0 0" size="0.1 0.01 0.01" rgba="1 0 0 1" type="box"></site>
<site name="table0_y_axes" pos="0 0.1 0" size="0.01 0.1 0.01" rgba="0 1 0 1" type="box"></site>
<site name="table0_z_axes" pos="0 0 0.1" size="0.01 0.01 0.1" rgba="0 0 1 1" type="box"></site>
</body>
<!--load test box-->
<body name="object0" pos="0.025 -0.025 0.025">
<joint name="object0:joint" type="free" damping="0.01"></joint>
<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
</body>
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
</worldbody>
<actuator>
<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:l_gripper_finger_joint" kp="30000" name="robot0:l_gripper_finger_joint" user="1"></position>
<position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:r_gripper_finger_joint" kp="30000" name="robot0:r_gripper_finger_joint" user="1"></position>
</actuator>
</mujoco>
Then you need create a python file which named "/home/kai/GraphGuidedAssembly/gym/gym/envs/robotics/assembly_env.py". It is a core file which defined the reward, refresh and reset of the assembly environment. In this place, for a demo, i changed the grasp demo. The content is as shown in the following:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 16 22:33:05 2020
@author: kai
"""
import numpy as np
from gym.envs.robotics import rotations, robot_env, utils
def goal_distance(goal_a, goal_b):
assert goal_a.shape == goal_b.shape
return np.linalg.norm(goal_a - goal_b, axis=-1)
class AssemblyEnv(robot_env.RobotEnv):
def __init__(
self, model_path, n_substeps, gripper_extra_height, block_gripper,
has_object, target_in_the_air, target_offset, obj_range, target_range,
distance_threshold, initial_qpos, reward_type,
):
"""Initializes a new Assembly environment.
Args:
model_path (string): path to the environments XML file
n_substeps (int): number of substeps the simulation runs on every call to step
gripper_extra_height (float): additional height above the table when positioning the gripper
block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not
has_object (boolean): whether or not the environment has an object
target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface
target_offset (float or array with 3 elements): offset of the target
obj_range (float): range of a uniform distribution for sampling initial object positions
target_range (float): range of a uniform distribution for sampling a target
distance_threshold (float): the threshold after which a goal is considered achieved
initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
"""
self.gripper_extra_height = gripper_extra_height
self.block_gripper = block_gripper
self.has_object = has_object
self.target_in_the_air = target_in_the_air
self.target_offset = target_offset
self.obj_range = obj_range
self.target_range = target_range
self.distance_threshold = distance_threshold
self.reward_type = reward_type
super(AssemblyEnv, self).__init__(
model_path=model_path, n_substeps=n_substeps, n_actions=4,
initial_qpos=initial_qpos)
# GoalEnv methods
# ----------------------------
def compute_reward(self, achieved_goal, goal, info):
# Compute distance between goal and the achieved goal.
d = goal_distance(achieved_goal, goal)
if self.reward_type == 'sparse':
return -(d > self.distance_threshold).astype(np.float32)
else:
return -d
# RobotEnv methods
# ----------------------------
def _step_callback(self):
if self.block_gripper:
self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.)
self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.)
self.sim.forward()
def _set_action(self, action):
assert action.shape == (4,)
action = action.copy() # ensure that we don't change the action outside of this scope
pos_ctrl, gripper_ctrl = action[:3], action[3]
pos_ctrl *= 0.05 # limit maximum change in position
rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion
gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
assert gripper_ctrl.shape == (2,)
if self.block_gripper:
gripper_ctrl = np.zeros_like(gripper_ctrl)
action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
# Apply action to simulation.
utils.ctrl_set_action(self.sim, action)
utils.mocap_set_action(self.sim, action)
def _get_obs(self):
# positions
target_obj='chair_leg0'
grip_pos = self.sim.data.get_site_xpos('robot0:grip')
dt = self.sim.nsubsteps * self.sim.model.opt.timestep
grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
if self.has_object:
object_pos = self.sim.data.get_site_xpos(target_obj)
# rotations
object_rot = rotations.mat2euler(self.sim.data.get_site_xmat(target_obj))
# velocities
object_velp = self.sim.data.get_site_xvelp(target_obj) * dt
object_velr = self.sim.data.get_site_xvelr(target_obj) * dt
# gripper state
object_rel_pos = object_pos - grip_pos
object_velp -= grip_velp
else:
object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0)
gripper_state = robot_qpos[-2:]
gripper_vel = robot_qvel[-2:] * dt # change to a scalar if the gripper is made symmetric
if not self.has_object:
achieved_goal = grip_pos.copy()
else:
achieved_goal = np.squeeze(object_pos.copy())
obs = np.concatenate([
grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(),
object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel,
])
return {
'observation': obs.copy(),
'achieved_goal': achieved_goal.copy(),
'desired_goal': self.goal.copy(),
}
def _viewer_setup(self):
body_id = self.sim.model.body_name2id('robot0:gripper_link')
lookat = self.sim.data.body_xpos[body_id]
for idx, value in enumerate(lookat):
self.viewer.cam.lookat[idx] = value
self.viewer.cam.distance = 2.5
self.viewer.cam.azimuth = 132.
self.viewer.cam.elevation = -14.
def _render_callback(self):
# Visualize target.
sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
site_id = self.sim.model.site_name2id('target0')
self.sim.model.site_pos[site_id] = self.goal - sites_offset[0]
self.sim.forward()
def _reset_sim(self):
self.sim.set_state(self.initial_state)
# Randomize start position of object.
if self.has_object:
object_xpos = self.initial_gripper_xpos[:2]
while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2)
object_qpos = self.sim.data.get_joint_qpos('chair_leg0:joint')
assert object_qpos.shape == (7,)
object_qpos[:2] = object_xpos
self.sim.data.set_joint_qpos('chair_leg0:joint', object_qpos)
self.sim.forward()
return True
def _sample_goal(self):
if self.has_object:
goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
goal += self.target_offset
goal[2] = self.height_offset
if self.target_in_the_air and self.np_random.uniform() < 0.5:
goal[2] += self.np_random.uniform(0, 0.45)
else:
goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
return goal.copy()
def _is_success(self, achieved_goal, desired_goal):
d = goal_distance(achieved_goal, desired_goal)
return (d < self.distance_threshold).astype(np.float32)
def _env_setup(self, initial_qpos):
for name, value in initial_qpos.items():
self.sim.data.set_joint_qpos(name, value)
utils.reset_mocap_welds(self.sim)
self.sim.forward()
# Move end effector into position.
gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos('robot0:grip')
gripper_rotation = np.array([1., 0., 1., 0.])
self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
for _ in range(10):
self.sim.step()
# Extract information for sampling goals.
self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy()
if self.has_object:
self.height_offset = self.sim.data.get_site_xpos('chair_leg0')[2]
def render(self, mode='human', width=500, height=500):
return super(AssemblyEnv, self).render(mode, width, height)
In this file, "chair_leg0" and class name "AssemblyEnv" are what we changed compared with "fetch_env.py". 'target0' in "render_callback" function was used for visualizing the target goal!
Then we need a python file to call or initialize the "AssemblyEnv", this file was created under "~/gym/gym/envs/robotics/fetch/pick_assembly.py". it was as shown in the following:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 16 22:55:09 2020
@author: kai
"""
import os
from gym import utils
from gym.envs.robotics import assembly_env
# Ensure we get the path separator correct on windows
MODEL_XML_PATH = os.path.join('fetch', 'pick_assembly.xml')
class FetchAssemblyEnv(assembly_env.AssemblyEnv, utils.EzPickle):
def __init__(self, reward_type='sparse'):
initial_qpos = {
'robot0:slide0': 0.405,
'robot0:slide1': 0.48,
'robot0:slide2': 0.0,
'chair_leg0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
}
assembly_env.AssemblyEnv.__init__(
self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
obj_range=0.15, target_range=0.15, distance_threshold=0.05,
initial_qpos=initial_qpos, reward_type=reward_type)
utils.EzPickle.__init__(self)
Lastly, a demo environment of gym finished, but we need register it in gym so that we can use gym.make() to build the environment.
Firstly, Modifying the file "~/gym/gym/envs/robotics/__init__.py", we need load our environment. this sentence "from gym.envs.robotics.fetch.pick_assembly import FetchAssemblyEnv" was what we added.
from gym.envs.robotics.fetch_env import FetchEnv
from gym.envs.robotics.fetch.slide import FetchSlideEnv
from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
from gym.envs.robotics.fetch.push import FetchPushEnv
from gym.envs.robotics.fetch.reach import FetchReachEnv
#added by someone
from gym.envs.robotics.fetch.pick_assembly import FetchAssemblyEnv
from gym.envs.robotics.hand.reach import HandReachEnv
from gym.envs.robotics.hand.manipulate import HandBlockEnv
from gym.envs.robotics.hand.manipulate import HandEggEnv
from gym.envs.robotics.hand.manipulate import HandPenEnv
from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv
from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv
from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv
Second, Modifying the file "~/gym/gym/envs/__init__.py", we need register our environment. Just add the following code after "#fetch" segment (line: 348), we added our register code in line 377-383.
register(
id='FetchAssembly{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchAssemblyEnv',
kwargs=kwargs,
max_episode_steps=50,
)
Whole file of "~/gym/gym/envs/__init__.py" was also as shown in the following:
from gym.envs.registration import registry, register, make, spec
# Algorithmic
# ----------------------------------------
register(
id='Copy-v0',
entry_point='gym.envs.algorithmic:CopyEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='RepeatCopy-v0',
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
max_episode_steps=200,
reward_threshold=75.0,
)
register(
id='ReversedAddition-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 2},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='ReversedAddition3-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 3},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='DuplicatedInput-v0',
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
max_episode_steps=200,
reward_threshold=9.0,
)
register(
id='Reverse-v0',
entry_point='gym.envs.algorithmic:ReverseEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
# Classic
# ----------------------------------------
register(
id='CartPole-v0',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=200,
reward_threshold=195.0,
)
register(
id='CartPole-v1',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=500,
reward_threshold=475.0,
)
register(
id='MountainCar-v0',
entry_point='gym.envs.classic_control:MountainCarEnv',
max_episode_steps=200,
reward_threshold=-110.0,
)
register(
id='MountainCarContinuous-v0',
entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
max_episode_steps=999,
reward_threshold=90.0,
)
register(
id='Pendulum-v0',
entry_point='gym.envs.classic_control:PendulumEnv',
max_episode_steps=200,
)
register(
id='Acrobot-v1',
entry_point='gym.envs.classic_control:AcrobotEnv',
reward_threshold=-100.0,
max_episode_steps=500,
)
# Box2d
# ----------------------------------------
register(
id='LunarLander-v2',
entry_point='gym.envs.box2d:LunarLander',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='LunarLanderContinuous-v2',
entry_point='gym.envs.box2d:LunarLanderContinuous',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='BipedalWalker-v3',
entry_point='gym.envs.box2d:BipedalWalker',
max_episode_steps=1600,
reward_threshold=300,
)
register(
id='BipedalWalkerHardcore-v3',
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
max_episode_steps=2000,
reward_threshold=300,
)
register(
id='CarRacing-v0',
entry_point='gym.envs.box2d:CarRacing',
max_episode_steps=1000,
reward_threshold=900,
)
# Toy Text
# ----------------------------------------
register(
id='Blackjack-v0',
entry_point='gym.envs.toy_text:BlackjackEnv',
)
register(
id='KellyCoinflip-v0',
entry_point='gym.envs.toy_text:KellyCoinflipEnv',
reward_threshold=246.61,
)
register(
id='KellyCoinflipGeneralized-v0',
entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
)
register(
id='FrozenLake-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '4x4'},
max_episode_steps=100,
reward_threshold=0.78, # optimum = .8196
)
register(
id='FrozenLake8x8-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '8x8'},
max_episode_steps=200,
reward_threshold=0.99, # optimum = 1
)
register(
id='CliffWalking-v0',
entry_point='gym.envs.toy_text:CliffWalkingEnv',
)
register(
id='NChain-v0',
entry_point='gym.envs.toy_text:NChainEnv',
max_episode_steps=1000,
)
register(
id='Roulette-v0',
entry_point='gym.envs.toy_text:RouletteEnv',
max_episode_steps=100,
)
register(
id='Taxi-v3',
entry_point='gym.envs.toy_text:TaxiEnv',
reward_threshold=8, # optimum = 8.46
max_episode_steps=200,
)
register(
id='GuessingGame-v0',
entry_point='gym.envs.toy_text:GuessingGame',
max_episode_steps=200,
)
register(
id='HotterColder-v0',
entry_point='gym.envs.toy_text:HotterColder',
max_episode_steps=200,
)
# Mujoco
# ----------------------------------------
# 2D
register(
id='Reacher-v2',
entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id='Pusher-v2',
entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Thrower-v2',
entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Striker-v2',
entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='InvertedPendulum-v2',
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id='InvertedDoublePendulum-v2',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id='HalfCheetah-v2',
entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id='HalfCheetah-v3',
entry_point='gym.envs.mujoco.half_cheetah_v3:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id='Hopper-v2',
entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id='Hopper-v3',
entry_point='gym.envs.mujoco.hopper_v3:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id='Swimmer-v2',
entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id='Swimmer-v3',
entry_point='gym.envs.mujoco.swimmer_v3:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id='Walker2d-v2',
max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv',
)
register(
id='Walker2d-v3',
max_episode_steps=1000,
entry_point='gym.envs.mujoco.walker2d_v3:Walker2dEnv',
)
register(
id='Ant-v2',
entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id='Ant-v3',
entry_point='gym.envs.mujoco.ant_v3:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id='Humanoid-v2',
entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000,
)
register(
id='Humanoid-v3',
entry_point='gym.envs.mujoco.humanoid_v3:HumanoidEnv',
max_episode_steps=1000,
)
register(
id='HumanoidStandup-v2',
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000,
)
# Robotics
# ----------------------------------------
def _merge(a, b):
a.update(b)
return a
for reward_type in ['sparse', 'dense']:
suffix = 'Dense' if reward_type == 'dense' else ''
kwargs = {
'reward_type': reward_type,
}
# Fetch
register(
id='FetchSlide{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchSlideEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchPickAndPlace{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchPickAndPlaceEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchReach{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchReachEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchPush{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchPushEnv',
kwargs=kwargs,
max_episode_steps=50,
)
#added by someone
register(
id='FetchAssembly{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchAssemblyEnv',
kwargs=kwargs,
max_episode_steps=50,
)
# Hand
register(
id='HandReach{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandReachEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='HandManipulateBlockRotateZ{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateZTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateZTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateParallel{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateParallelTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateParallelTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateXYZ{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateXYZTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateXYZTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulateBlock{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandBlockTouchSensorsEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggRotate{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggRotateTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggRotateTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandEggTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulateEgg{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggTouchSensorsEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandEggTouchSensorsEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenRotate{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenRotateTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenRotateTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandPenTouchSensorsEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulatePen{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenTouchSensors{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenTouchSensorsEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz', 'touch_get_obs': 'boolean'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenTouchSensors{}-v1'.format(suffix),
entry_point='gym.envs.robotics:HandPenTouchSensorsEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz', 'touch_get_obs': 'sensordata'}, kwargs),
max_episode_steps=100,
)
# Atari
# ----------------------------------------
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
for game in ['adventure', 'air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
'centipede', 'chopper_command', 'crazy_climber', 'defender', 'demon_attack', 'double_dunk',
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
for obs_type in ['image', 'ram']:
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
name = ''.join([g.capitalize() for g in game.split('_')])
if obs_type == 'ram':
name = '{}-ram'.format(name)
nondeterministic = False
if game == 'elevator_action' and obs_type == 'ram':
# ElevatorAction-ram-v0 seems to yield slightly
# non-deterministic observations about 10% of the time. We
# should track this down eventually, but for now we just
# mark it as nondeterministic.
nondeterministic = True
register(
id='{}-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
max_episode_steps=10000,
nondeterministic=nondeterministic,
)
register(
id='{}-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
# Standard Deterministic (as in the original DeepMind paper)
if game == 'space_invaders':
frameskip = 3
else:
frameskip = 4
# Use a deterministic frame skip.
register(
id='{}Deterministic-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}Deterministic-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}NoFrameskip-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25}, # A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# No frameskip. (Atari has no entropy source, so these are
# deterministic environments.)
register(
id='{}NoFrameskip-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# Unit test
# ---------
register(
id='CubeCrash-v0',
entry_point='gym.envs.unittest:CubeCrash',
reward_threshold=0.9,
)
register(
id='CubeCrashSparse-v0',
entry_point='gym.envs.unittest:CubeCrashSparse',
reward_threshold=0.9,
)
register(
id='CubeCrashScreenBecomesBlack-v0',
entry_point='gym.envs.unittest:CubeCrashScreenBecomesBlack',
reward_threshold=0.9,
)
register(
id='MemorizeDigits-v0',
entry_point='gym.envs.unittest:MemorizeDigits',
reward_threshold=20,
)
Finally, run command "pip install -e ." in your gym environment. The new environment had been added to your gym system. You can try a simple traning demo by using the following command. The prerequisite is the basic training of fetch can work normally. It means you need install baseline correctly, besides, the pick and place demo can be achieved.
In your baselines directory (cd ~/baselines/baselines).
python run.py --alg=her --env=FetchAssembly-v1 --num_timesteps=5e3 --play
Ok, you get a personalized environment!