文章目录
一、turtlebot3_dqn是怎样具体运动的?
是不是就是/turtlebot3_machine_learning/turtlebot3_dqn/nodes/turtlebot3_dqn_stage_1中的action_size = 5
所对应的5个动作?这的确是一种简单的理解方式,但是问题同样存在——
《/turtlebot3_machine_learning/turtlebot3_dqn/nodes/turtlebot3_dqn_stage_1》
《/turtlebot3_machine_learning/turtlebot3_dqn/src/turtlebot3_dqn/environment_stage_1.py》
上述两个文件中根本没有提到turtlebot3_teleop_key文件或者teleop关键词
1、《turtlebot3_teleop_key》
import rospy
from geometry_msgs.msg import Twist
import sys, select, os
if os.name == 'nt':
import msvcrt
else:
import tty, termios
BURGER_MAX_LIN_VEL = 0.22
BURGER_MAX_ANG_VEL = 2.84
WAFFLE_MAX_LIN_VEL = 0.26
WAFFLE_MAX_ANG_VEL = 1.82
LIN_VEL_STEP_SIZE = 0.01
ANG_VEL_STEP_SIZE = 0.1
msg = """
Control Your TurtleBot3!
---------------------------
Moving around:
w
a s d
x
w/x : increase/decrease linear velocity (Burger : ~ 0.22, Waffle and Waffle Pi : ~ 0.26)
a/d : increase/decrease angular velocity (Burger : ~ 2.84, Waffle and Waffle Pi : ~ 1.82)
space key, s : force stop
CTRL-C to quit
"""
e = """
Communications Failed
"""
def getKey():
if os.name == 'nt':
return msvcrt.getch()
tty.setraw(sys.stdin.fileno())
rlist, _, _ = select.select([sys.stdin], [], [], 0.1)
if rlist:
key = sys.stdin.read(1)
else:
key = ''
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, settings)
return key
def vels(target_linear_vel, target_angular_vel):
return "currently:\tlinear vel %s\t angular vel %s " % (target_linear_vel,target_angular_vel)
def makeSimpleProfile(output, input, slop):
if input > output:
output = min( input, output + slop )
elif input < output:
output = max( input, output - slop )
else:
output = input
return output
def constrain(input, low, high):
if input < low:
input = low
elif input > high:
input = high
else:
input = input
return input
def checkLinearLimitVelocity(vel):
if turtlebot3_model == "burger":
vel = constrain(vel, -BURGER_MAX_LIN_VEL, BURGER_MAX_LIN_VEL)
elif turtlebot3_model == "waffle" or turtlebot3_model == "waffle_pi":
vel = constrain(vel, -WAFFLE_MAX_LIN_VEL, WAFFLE_MAX_LIN_VEL)
else:
vel = constrain(vel, -BURGER_MAX_LIN_VEL, BURGER_MAX_LIN_VEL)
return vel
def checkAngularLimitVelocity(vel):
if turtlebot3_model == "burger":
vel = constrain(vel, -BURGER_MAX_ANG_VEL, BURGER_MAX_ANG_VEL)
elif turtlebot3_model == "waffle" or turtlebot3_model == "waffle_pi":
vel = constrain(vel, -WAFFLE_MAX_ANG_VEL, WAFFLE_MAX_ANG_VEL)
else:
vel = constrain(vel, -BURGER_MAX_ANG_VEL, BURGER_MAX_ANG_VEL)
return vel
if __name__=="__main__":
if os.name != 'nt':
settings = termios.tcgetattr(sys.stdin)
rospy.init_node('turtlebot3_teleop')
pub = rospy.Publisher('cmd_vel', Twist, queue_size=10)
turtlebot3_model = rospy.get_param("model", "burger")
status = 0
target_linear_vel = 0.0
target_angular_vel = 0.0
control_linear_vel = 0.0
control_angular_vel = 0.0
try:
print(msg)
while(1):
key = getKey()
if key == 'w' :
target_linear_vel = checkLinearLimitVelocity(target_linear_vel + LIN_VEL_STEP_SIZE)
status = status + 1
print(vels(target_linear_vel,target_angular_vel))
elif key == 'x' :
target_linear_vel = checkLinearLimitVelocity(target_linear_vel - LIN_VEL_STEP_SIZE)
status = status + 1
print(vels(target_linear_vel,target_angular_vel))
elif key == 'a' :
target_angular_vel = checkAngularLimitVelocity(target_angular_vel + ANG_VEL_STEP_SIZE)
status = status + 1
print(vels(target_linear_vel,target_angular_vel))
elif key == 'd' :
target_angular_vel = checkAngularLimitVelocity(target_angular_vel - ANG_VEL_STEP_SIZE)
status = status + 1
print(vels(target_linear_vel,target_angular_vel))
elif key == ' ' or key == 's' :
target_linear_vel = 0.0
control_linear_vel = 0.0
target_angular_vel = 0.0
control_angular_vel = 0.0
print(vels(target_linear_vel, target_angular_vel))
else:
if (key == '\x03'):
break
if status == 20 :
print(msg)
status = 0
twist = Twist()
control_linear_vel = makeSimpleProfile(control_linear_vel, target_linear_vel, (LIN_VEL_STEP_SIZE/2.0))
twist.linear.x = control_linear_vel; twist.linear.y = 0.0; twist.linear.z = 0.0
control_angular_vel = makeSimpleProfile(control_angular_vel, target_angular_vel, (ANG_VEL_STEP_SIZE/2.0))
twist.angular.x = 0.0; twist.angular.y = 0.0; twist.angular.z = control_angular_vel
pub.publish(twist)
except:
print(e)
finally:
twist = Twist()
twist.linear.x = 0.0; twist.linear.y = 0.0; twist.linear.z = 0.0
twist.angular.x = 0.0; twist.angular.y = 0.0; twist.angular.z = 0.0
pub.publish(twist)
if os.name != 'nt':
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, settings)
将该文件删除之后,turtlebot3_dqn仍可以继续正常运行,说明第一个文件与turtlebot3_dqn的运动确实无关。
笔记本终端1:
source activate py27
cd ~/catkin_ws
source devel/setup.bash
export TURTLEBOT3_MODEL=burger
roslaunch turtlebot3_gazebo turtlebot3_stage_1.launch
笔记本终端2:
source activate py27
cd ~/catkin_ws
source devel/setup.bash
export TURTLEBOT3_MODEL=burger
roslaunch turtlebot3_dqn turtlebot3_dqn_stage_1.launch
2、《turtlebot3_dqn_stage_1》
import rospy
import os
import json
import numpy as np
import random
import time
import sys
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
from collections import deque
from std_msgs.msg import Float32MultiArray
from src.turtlebot3_dqn.environment_stage_1 import Env
from keras.models import Sequential, load_model
from keras.optimizers import RMSprop
from keras.layers import Dense, Dropout, Activation
EPISODES = 3000
class ReinforceAgent():
def __init__(self, state_size, action_size):
self.pub_result = rospy.Publisher('result', Float32MultiArray, queue_size=5)
self.dirPath = os.path.dirname(os.path.realpath(__file__))
self.dirPath = self.dirPath.replace('turtlebot3_dqn/nodes', 'turtlebot3_dqn/save_model/stage_1_')
self.result = Float32MultiArray()
self.load_model = True
self.load_episode = 30
self.state_size = state_size
self.action_size = action_size
self.episode_step = 6000
self.target_update = 2000
self.discount_factor = 0.99
self.learning_rate = 0.00025
self.epsilon = 1.0
self.epsilon_decay = 0.99
self.epsilon_min = 0.05
self.batch_size = 64
self.train_start = 64
self.memory = deque(maxlen=1000000)
self.model = self.buildModel()
self.target_model = self.buildModel()
self.updateTargetModel()
if self.load_model:
self.model.set_weights(load_model(self.dirPath+str(self.load_episode)+".h5").get_weights())
with open(self.dirPath+str(self.load_episode)+'.json') as outfile:
param = json.load(outfile)
self.epsilon = param.get('epsilon')
print param # fyo
print self.epsilon # fyo
def buildModel(self):
model = Sequential()
dropout = 0.2
model.add(Dense(64, input_shape=(self.state_size,), activation='relu', kernel_initializer='lecun_uniform'))
model.add(Dense(64, activation='relu', kernel_initializer='lecun_uniform'))
model.add(Dropout(dropout))
model.add(Dense(self.action_size, kernel_initializer='lecun_uniform'))
model.add(Activation('linear'))
model.compile(loss='mse', optimizer=RMSprop(lr=self.learning_rate, rho=0.9, epsilon=1e-06))
model.summary()
return model
def getQvalue(self, reward, next_target, done):
if done:
return reward
else:
return reward + self.discount_factor * np.amax(next_target)
def updateTargetModel(self):
self.target_model.set_weights(self.model.get_weights())
def getAction(self, state):
if np.random.rand() <= self.epsilon:
self.q_value = np.zeros(self.action_size)
return random.randrange(self.action_size)
else:
q_value = self.model.predict(state.reshape(1, len(state)))
self.q_value = q_value
return np.argmax(q_value[0])
def appendMemory(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def trainModel(self, target=False):
mini_batch = random.sample(self.memory, self.batch_size)
X_batch = np.empty((0, self.state_size), dtype=np.float64)
Y_batch = np.empty((0, self.action_size), dtype=np.float64)
for i in range(self.batch_size):
states = mini_batch[i][0]
actions = mini_batch[i][1]
rewards = mini_batch[i][2]
next_states = mini_batch[i][3]
dones = mini_batch[i][4]
q_value = self.model.predict(states.reshape(1, len(states)))
self.q_value = q_value
if target:
next_target = self.target_model.predict(next_states.reshape(1, len(next_states)))
else:
next_target = self.model.predict(next_states.reshape(1, len(next_states)))
next_q_value = self.getQvalue(rewards, next_target, dones)
X_batch = np.append(X_batch, np.array([states.copy()]), axis=0)
Y_sample = q_value.copy()
Y_sample[0][actions] = next_q_value
Y_batch = np.append(Y_batch, np.array([Y_sample[0]]), axis=0)
if dones:
X_batch = np.append(X_batch, np.array([next_states.copy()]), axis=0)
Y_batch = np.append(Y_batch, np.array([[rewards] * self.action_size]), axis=0)
self.model.fit(X_batch, Y_batch, batch_size=self.batch_size, epochs=1, verbose=0)
if __name__ == '__main__':
rospy.init_node('turtlebot3_dqn_stage_1')
pub_result = rospy.Publisher('result', Float32MultiArray, queue_size=5)
pub_get_action = rospy.Publisher('get_action', Float32MultiArray, queue_size=5)
result = Float32MultiArray()
get_action = Float32MultiArray()
state_size = 26
action_size = 5
env = Env(action_size)
agent = ReinforceAgent(state_size, action_size)
scores, episodes = [], []
global_step = 0
start_time = time.time()
for e in range(agent.load_episode + 1, EPISODES):
done = False
state = env.reset()
score = 0
for t in range(agent.episode_step):
action = agent.getAction(state)
next_state, reward, done = env.step(action)
agent.appendMemory(state, action, reward, next_state, done)
if len(agent.memory) >= agent.train_start:
if global_step <= agent.target_update:
agent.trainModel()
else:
agent.trainModel(True)
score += reward
state = next_state
get_action.data = [action, score, reward]
pub_get_action.publish(get_action)
if e % 10 == 0:
agent.model.save(agent.dirPath + str(e) + '.h5')
with open(agent.dirPath + str(e) + '.json', 'w') as outfile:
json.dump(param_dictionary, outfile)
if t >= 500:
rospy.loginfo("Time out!!")
done = True
if done:
result.data = [score, np.max(agent.q_value)]
pub_result.publish(result)
agent.updateTargetModel()
scores.append(score)
episodes.append(e)
m, s = divmod(int(time.time() - start_time), 60)
h, m = divmod(m, 60)
rospy.loginfo('Ep: %d score: %.2f memory: %d epsilon: %.2f time: %d:%02d:%02d',
e, score, len(agent.memory), agent.epsilon, h, m, s)
param_keys = ['epsilon']
param_values = [agent.epsilon]
param_dictionary = dict(zip(param_keys, param_values))
break
global_step += 1
if global_step % agent.target_update == 0:
rospy.loginfo("UPDATE TARGET NETWORK")
if agent.epsilon > agent.epsilon_min:
agent.epsilon *= agent.epsilon_decay
上述代码中的这5句可能是关键:
# 定义话题‘get_action’
pub_get_action = rospy.Publisher('get_action', Float32MultiArray, queue_size=5)
get_action = Float32MultiArray()
next_state, reward, done = env.step(action)
get_action.data = [action, score, reward]
# 发布话题‘get_action’
pub_get_action.publish(get_action)
3、《environment_stage_1.py》
import rospy
import numpy as np
import math
from math import pi
from geometry_msgs.msg import Twist, Point, Pose
from sensor_msgs.msg import LaserScan
from nav_msgs.msg import Odometry
from std_srvs.srv import Empty
from tf.transformations import euler_from_quaternion, quaternion_from_euler
from respawnGoal import Respawn
class Env():
def __init__(self, action_size):
self.goal_x = 0
self.goal_y = 0
self.heading = 0
self.action_size = action_size
self.initGoal = True
self.get_goalbox = False
self.position = Pose()
self.pub_cmd_vel = rospy.Publisher('cmd_vel', Twist, queue_size=5)
self.sub_odom = rospy.Subscriber('odom', Odometry, self.getOdometry)
self.reset_proxy = rospy.ServiceProxy('gazebo/reset_simulation', Empty)
self.unpause_proxy = rospy.ServiceProxy('gazebo/unpause_physics', Empty)
self.pause_proxy = rospy.ServiceProxy('gazebo/pause_physics', Empty)
self.respawn_goal = Respawn()
def getGoalDistace(self):
goal_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y), 2)
return goal_distance
def getOdometry(self, odom):
self.position = odom.pose.pose.position
orientation = odom.pose.pose.orientation
orientation_list = [orientation.x, orientation.y, orientation.z, orientation.w]
_, _, yaw = euler_from_quaternion(orientation_list)
goal_angle = math.atan2(self.goal_y - self.position.y, self.goal_x - self.position.x)
heading = goal_angle - yaw
if heading > pi:
heading -= 2 * pi
elif heading < -pi:
heading += 2 * pi
self.heading = round(heading, 2)
def getState(self, scan):
scan_range = []
heading = self.heading
min_range = 0.13
done = False
for i in range(len(scan.ranges)):
if scan.ranges[i] == float('Inf'):
scan_range.append(3.5)
elif np.isnan(scan.ranges[i]):
scan_range.append(0)
else:
scan_range.append(scan.ranges[i])
if min_range > min(scan_range) > 0:
done = True
current_distance = round(math.hypot(self.goal_x - self.position.x, self.goal_y - self.position.y),2)
if current_distance < 0.2:
self.get_goalbox = True
return scan_range + [heading, current_distance], done
def setReward(self, state, done, action):
yaw_reward = []
current_distance = state[-1]
heading = state[-2]
for i in range(5):
angle = -pi / 4 + heading + (pi / 8 * i) + pi / 2
tr = 1 - 4 * math.fabs(0.5 - math.modf(0.25 + 0.5 * angle % (2 * math.pi) / math.pi)[0])
yaw_reward.append(tr)
distance_rate = 2 ** (current_distance / self.goal_distance)
reward = ((round(yaw_reward[action] * 5, 2)) * distance_rate)
if done:
rospy.loginfo("Collision!!")
reward = -200
self.pub_cmd_vel.publish(Twist())
if self.get_goalbox:
rospy.loginfo("Goal!!")
reward = 200
self.pub_cmd_vel.publish(Twist())
self.goal_x, self.goal_y = self.respawn_goal.getPosition(True, delete=True)
self.goal_distance = self.getGoalDistace()
self.get_goalbox = False
return reward
def step(self, action):
max_angular_vel = 1.5
ang_vel = ((self.action_size - 1)/2 - action) * max_angular_vel * 0.5
vel_cmd = Twist()
vel_cmd.linear.x = 0.15
vel_cmd.angular.z = ang_vel
self.pub_cmd_vel.publish(vel_cmd)
print vel_cmd # fyo
data = None
while data is None:
try:
data = rospy.wait_for_message('scan', LaserScan, timeout=5)
except:
pass
state, done = self.getState(data)
reward = self.setReward(state, done, action)
return np.asarray(state), reward, done
def reset(self):
rospy.wait_for_service('gazebo/reset_simulation')
try:
self.reset_proxy()
except (rospy.ServiceException) as e:
print("gazebo/reset_simulation service call failed")
data = None
while data is None:
try:
data = rospy.wait_for_message('scan', LaserScan, timeout=5)
except:
pass
if self.initGoal:
self.goal_x, self.goal_y = self.respawn_goal.getPosition()
self.initGoal = False
self.goal_distance = self.getGoalDistace()
state, done = self.getState(data)
return np.asarray(state)
4、探究turtlebot3_dqn的动作根源
是pub_get_action.publish(get_action)
使得bot3产生动作?
还是self.pub_cmd_vel.publish(vel_cmd)
使得bot3产生动作?
(1)environment_stage_1.py中添加’print vel_cmd’
在/turtlebot3_machine_learning/turtlebot3_dqn/src/turtlebot3_dqn/environment_stage_1.py中添加
print vel_cmd # fyo
然后分析可知,
linear的结果一直是
x: 0.15
y: 0.0
z: 0.0
angular的结果分别有
x: 0.0
y: 0.0
z: 1.5
x: 0.0
y: 0.0
z: 0.75
x: 0.0
y: 0.0
z: 0.0
x: 0.0
y: 0.0
z: -0.75
x: 0.0
y: 0.0
z: -1.5
根据《/turtlebot3_machine_learning/turtlebot3_dqn/src/turtlebot3_dqn/environment_stage_1.py》中的代码,action对应的值是[0, 1, 2, 3, 4]。也就是说,直线速度一直不变,变的5个动作区间就是转弯动作~~
(2)分析莫烦python中的DQN
maze_env中有这样一段:
def step(self, action):
s = self.canvas.coords(self.rect)
base_action = np.array([0, 0])
if action == 0: # up
if s[1] > UNIT:
base_action[1] -= UNIT
elif action == 1: # down
if s[1] < (MAZE_H - 1) * UNIT:
base_action[1] += UNIT
elif action == 2: # right
if s[0] < (MAZE_W - 1) * UNIT:
base_action[0] += UNIT
elif action == 3: # left
if s[0] > UNIT:
base_action[0] -= UNIT
作类比的话,应该是environment_stage_1.py中的step函数施加命令。
(3)分析结果
action是在哪定义的?
然后传到哪里去执行?
如果是self.pub_cmd_vel.publish(vel_cmd)
使turtlebot产生动作,那么是哪个程序接收指令、并且执行程序?不妨再琢磨琢磨turtlebot3_teleop_key,做个类比。
查看rqt_graph
得到
Nodes only图——
Nodes/Topics(all)图——
话题’/cmd_vel’实际上发自《environment_stage_1.py》,而不是《turtlebot3_dqn_stage_1》,不过前者不是节点,后者是节点,所以得到上图。好了,我得到的启示是,把话题publish之后就不用管了,剩下的都是/gazebo的事咯~~
再看了一眼mrobot_teleop,目前维持这个判断。
不对不对,根据:
def step(self, action):
max_angular_vel = 1.5
ang_vel = ((self.action_size - 1)/2 - action) * max_angular_vel * 0.5
vel_cmd = Twist()
vel_cmd.linear.x = 0.15
vel_cmd.angular.z = ang_vel
self.pub_cmd_vel.publish(vel_cmd)
一定有某个节点来订阅这个话题,然后再执行
在/turtlebot3/turtlebot3_description/urdf/turtlebot3_burger.gazebo.xacro文件中找到这样一段:
...
<gazebo>
<plugin name="turtlebot3_burger_controller" filename="libgazebo_ros_diff_drive.so">
<commandTopic>cmd_vel</commandTopic>
...
cmd_vel…已经很接近了…哈哈白高兴一场
二、下一步工作
1、保存模型时’.h5’文件和’.json’文件的区别?
根据以下代码可知,‘.h5’文件里面是模型参数,那么’.json’文件里面是啥?
if self.load_model:
self.model.set_weights(load_model(self.dirPath+str(self.load_episode)+".h5").get_weights())
with open(self.dirPath+str(self.load_episode)+'.json') as outfile:
param = json.load(outfile)
self.epsilon = param.get('epsilon')
找到一个说法:
1.HDF5格式文件保存的是 : Model weights
2.H5 格式文件保存的是: Model stucture 和 Model weights
3.JSON 和 YAML 格式问价保存的是: Model stucture
2、《turtlebot3_dqn_stage_1》中发布的俩话题?
中发布的两个话题都没有得到体现?
pub_result = rospy.Publisher('result', Float32MultiArray, queue_size=5)
pub_get_action = rospy.Publisher('get_action', Float32MultiArray, queue_size=5)
3、下一步打算
1、搞清运动指令流程
2、复现0508的程序
3、改bot3的rl算法
4、转为学习openai_ros
5、复现哈工大ma liulong的工作。
1、能不能不render,不用图形化界面、直接进行训练!
2、调整时间因子,更快速的训练
3、防止cpu、gpu烧坏
4、搞懂 soft actor-critic 算法