1.运动学模型 输入的是速度
public void MoveAgent(float []act)
{
agentRb.velocity = new Vector3(act[0], act[1], act[2]);
}
public override void OnActionReceived(ActionBuffers actionBuffers)
{
float[] actions = new float[3];
// 如果速度是输入 最大速度设为3
actions[0] = 3f * actionBuffers.ContinuousActions[0];
actions[1] = 3f * actionBuffers.ContinuousActions[1];
actions[2] = 3f * actionBuffers.ContinuousActions[2];
MoveAgent(actions);
}
2.ForceMode.VelocityChange
ForceMode.VelocityChange 是每步长速度的变化量
agentRb.AddForce(act[0], act[1], act[2], ForceMode.VelocityChange);
Debug.Log("velx:" + velx);
Debug.Log("vely:" + vely);
Debug.Log("velz:" + velz);
Debug.Log("Time:" + Time.time);
比如 act[0-3]的增量设置为0.015时,可以看到每隔0.02(默认步长),速度在三个轴向分别增加0.015.
在使用脉冲控制的时候应注意这一点,是单步改变速度,还是多步连续改变速度,从而确定速度范围
关于脉冲控制的编程建议
public void MoveAgent(float []act)
{
float R = 42371000; // 轨道高度 米
float Miu = 3.98603e14f;//地球常数
// 参考坐标系的轨道角速度计算
float w0 = (float)Math.Sqrt(Miu/Math.Pow(R,3));
float posx = this.agentRb.transform.localPosition.x;
float posy = this.agentRb.transform.localPosition.y;
float posz = this.agentRb.transform.localPosition.z;
float velx = agentRb.velocity.x;
float vely = agentRb.velocity.y;
float velz = agentRb.velocity.z;
// 计算三轴加速度
float Accx = (float)(3*Math.Pow(w0,2)*posx + 2*w0*velz);
float Accy = (float)(-Math.Pow(w0,2)*posy);
float Accz = (float)(-2*w0*velx );
// 轨道动力学对速度的改变=加速度*时间(比如步长0.02s)
float velx_cw = Accx * 0.02;
float vely_cw = Accy * 0.02;
float velz_cw = Accz * 0.02;
// 速度的改变总量, act为施加脉冲该变量
float deltavx = velx_cw + act[0];
float deltavy = vely_cw + act[1];
float deltavz = velz_cw + act[2];
agentRb.AddForce(deltavx , deltavy, deltavz , ForceMode.VelocityChange);
// 打印速度
Debug.Log("velx:" + velx);
Debug.Log("vely:" + vely);
Debug.Log("velz:" + velz);
Debug.Log("Time:" + Time.time);
}
1v1 gridsensor 奖励为距离
两层神经网络效果
追踪是用的agentRb.AddForce(deltavx , deltavy, deltavz , ForceMode.VelocityChange);
调参
- 单层试一下
- decision 周期调大 试一下
- velocitychange 试一下