Official codebase for Decision Transformer: Reinforcement Learning via Sequence Modeling
参考了min-decision-transformer方法:min-decision-transformer
一、安装mujoco-py
.1.创建虚拟环境
conda create -n decision_transformer python=3.8
conda activate decision_transformer
注意python版本需要>=3.7,避免后续与d4rl的冲突。
2.安装libs
sudo apt-get update
sudo apt-get install gcc
sudo apt-get build-dep mesa
sudo apt-get install llvm-dev
sudo apt-get install freeglut3 freeglut3-dev
sudo apt-get install python3-dev
sudo apt-get install build-essential
sudo apt install curl git libgl1-mesa-dev libgl1-mesa-glx libglew-dev \
libosmesa6-dev software-properties-common net-tools unzip vim \
virtualenv wget xpra xserver-xorg-dev libglfw3-dev patchelf
3.安装mujoco
wget https://roboti.us/download/mujoco200_linux.zip
wget https://roboti.us/file/mjkey.txt
mkdir /root/.mujoco
### mujoco 200
unzip mujoco200_linux.zip -d /home/lab1527--2/.mujoco
cp -r /home/lab1527--2/.mujoco/mujoco200_linux /home/lab1527--2/.mujoco/mujoco200
mv mjkey.txt /home/lab1527--2/.mujoco
cp -r /home/lab1527--2/.mujocoo/mujoco200/bin/* /usr/lib/
4.环境变量
在~/.bashrc中添加
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/lab1527--2/.mujoco/mujoco200/bin
5.设置mujoco_py
pip install mujoco_py==2.0.2.8
二、安装D4RL
!!!!!!!!!!!!!!!!需要注意!!!!!!!!!!!!!!
在安装好mujoco,mujoco_py的基础上,直接安装d4rl会报错!!!!!!
1.先安装两个库文件,避免报错
pip install absl-py
pip install matplotlib
2.安装 dm_control
pip install dm_control
如果不先安装dm_control,会报错:
3. 克隆D4RL仓库:
git clone https://github.com/rail-berkeley/d4rl.git
4.找到d4rl目录下的setup.py文件,
!!!!!!!!注释mujoco_py, dm_control, mjrl。!!!!!!!!!!!
注意一定要注释掉mjrl,否则会报错。
5.安装
pip install -e .
6.安装mjrl
pip install git+https://github.com/aravindr93/mjrl@master#egg=mjrl
三、检查mujoco-py and D4RL
# set mujoco env path if not already set
%env LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco200/bin
import gym
import d4rl # Import required to register environments
env = gym.make('Walker2d-v3')
env.reset()
env.step(env.action_space.sample())
env.close()
print("mujoco-py check passed")
env = gym.make('walker2d-medium-v2')
env.reset()
env.step(env.action_space.sample())
env.close()
print("d4rl check passed")
四、下载D4RL数据集
# set mujoco env path if not already set
%env LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco200/bin
import os
import gym
import numpy as np
import collections
import pickle
import d4rl
datasets = []
data_dir = "./data"
print(data_dir)
if not os.path.exists(data_dir):
os.makedirs(data_dir)
for env_name in ['walker2d', 'halfcheetah', 'hopper']:
for dataset_type in ['medium', 'medium-expert', 'medium-replay']:
name = f'{env_name}-{dataset_type}-v2'
pkl_file_path = os.path.join(data_dir, name)
print("processing: ", name)
env = gym.make(name)
dataset = env.get_dataset()
N = dataset['rewards'].shape[0]
data_ = collections.defaultdict(list)
use_timeouts = False
if 'timeouts' in dataset:
use_timeouts = True
episode_step = 0
paths = []
for i in range(N):
done_bool = bool(dataset['terminals'][i])
if use_timeouts:
final_timestep = dataset['timeouts'][i]
else:
final_timestep = (episode_step == 1000-1)
for k in ['observations', 'next_observations', 'actions', 'rewards', 'terminals']:
data_[k].append(dataset[k][i])
if done_bool or final_timestep:
episode_step = 0
episode_data = {}
for k in data_:
episode_data[k] = np.array(data_[k])
paths.append(episode_data)
data_ = collections.defaultdict(list)
episode_step += 1
returns = np.array([np.sum(p['rewards']) for p in paths])
num_samples = np.sum([p['rewards'].shape[0] for p in paths])
print(f'Number of samples collected: {num_samples}')
print(f'Trajectory returns: mean = {np.mean(returns)}, std = {np.std(returns)}, max = {np.max(returns)}, min = {np.min(returns)}')
with open(f'{pkl_file_path}.pkl', 'wb') as f:
pickle.dump(paths, f)