unity版本:2022.3.42f1c1
一、将装配体文件导入unity
装配体文件由solidworks软件构建完成后,由3D MAX软件导出为fbx文件,再由fbx文件导入至unity中(导入教程CSDN自行搜索)
导入完成后设置好模型位置与模型材质
二、构建unity强化学习环境
①安装强化学习插件Barracuda和mlagents
我的安装版本是3.0.0 · March 01, 2022 Barracuda(该插件是调用CPU与GPU训练强化学习的插件)与2.0.1 · November 08, 2021 mlagents(该插件时强化学习工具箱,在设计强化学习程序时,有一些父类文件需要从工具箱中直接继承)
②构造必要的机械关联
为简化设计,将四个调平液压缸的y轴距离与活动横梁的四个受力点进行直接绑定
第一步:创建四个空物体,调整空物体位置使其与调平缸的四个受力点重合
第二步:在四个调平缸与受力点上绑定位置关系
脚本文件内容如下:
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class cy3 : MonoBehaviour
{
public Transform specifiedPoint3; // 在Unity编辑器中指定
// Start is called before the first frame update
void Start()
{
}
// Update is called once per frame
void Update()
{
float yCoordinate = specifiedPoint3.position.y;
Vector3 currentPosition = transform.position;
// 指定新的Y坐标
currentPosition.y = yCoordinate;
// 设置物体的新位置
transform.position = currentPosition;
// 打印Y坐标
// Debug.Log("指定点的Y坐标:" + yCoordinate);
}
}
第三步:在刚体组件中固定活动横梁的y轴位移与偏转
第四步:设置强化学习环境,在滑块组件下设置脚本文件,内容如下,代码内容自行理解
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class MyAgent : Agent //改为要继承的父类
{
Rigidbody huakuai;
// Start is called before the first frame update
public override void Initialize()
{
huakuai = GetComponent<Rigidbody>();
}
public override void OnEpisodeBegin()
{
// If the Agent fell, zero its momentum
this.huakuai.angularVelocity = Vector3.zero;
this.huakuai.velocity = Vector3.zero;
this.transform.localPosition = new Vector3(0, -0.37f, 0);
this.transform.localEulerAngles = new Vector3(Random.value * 8 - 8, 0, Random.value * 8 - 8);
}
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(gameObject.transform.rotation.z);
sensor.AddObservation(gameObject.transform.rotation.x);
sensor.AddObservation(huakuai.angularVelocity.z);
sensor.AddObservation(huakuai.angularVelocity.x);
}
public override void Heuristic(in ActionBuffers actionsOut)
{
var continuousActionsOut = actionsOut.ContinuousActions;
continuousActionsOut[1] = Input.GetAxis("Horizontal");
continuousActionsOut[2] = Input.GetAxis("Vertical");
//continuousActionsOut[2] = Input.GetKey("KeyCode.W");
//continuousActionsOut[3] = Input.GetAxis("D");
}
public override void OnActionReceived(ActionBuffers actions)
{
float x;
float z;
var action1 = System.Math.Tanh(actions.ContinuousActions[0]);
var action2 = System.Math.Tanh(actions.ContinuousActions[1]);
var action3 = System.Math.Tanh(actions.ContinuousActions[2]);
var action4 = System.Math.Tanh(actions.ContinuousActions[3]);
if (this.transform.localEulerAngles.x > 180)
x = this.transform.localEulerAngles.x - 360;
else
x = this.transform.localEulerAngles.x;
if (this.transform.localEulerAngles.z > 180)
z = this.transform.localEulerAngles.z - 360;
else
z = this.transform.localEulerAngles.z;
//Debug.Log("Horizontal:" + vectorAction[0]);
//Debug.Log("Vertical:" + vectorAction[1]);
//Debug.Log("x:" + System.Math.Abs(this.transform.localEulerAngles.x));
Vector3 control1 = Vector3.zero; //定义控制信号
Vector3 control2 = Vector3.zero; //定义控制信号
Vector3 control3 = Vector3.zero; //定义控制信号
Vector3 control4 = Vector3.zero; //定义控制信号
control1.y = 2 * (float)action1;
control2.y = 2 * (float)action2;
control3.y = 2 * (float)action3;
control4.y = 2 * (float)action4;
Vector3 position1 = this.transform.position + new Vector3((float)0.6, 0, (float)0.6);
Vector3 position2 = this.transform.position + new Vector3((float)0.6, 0, (float)-0.6);
Vector3 position3 = this.transform.position + new Vector3((float)-0.6, 0, (float)0.6);
Vector3 position4 = this.transform.position + new Vector3((float)-0.6, 0, (float)-0.6);
huakuai.AddForceAtPosition(control1, position1);
huakuai.AddForceAtPosition(control2, position2);
huakuai.AddForceAtPosition(control3, position3);
huakuai.AddForceAtPosition(control4, position4);
if (System.Math.Abs(x) > 15 || System.Math.Abs(z) > 15)
{
EndEpisode();
}
double reward;
//if (x < 0.5 || x > -0.5)
{
// AddReward(1f);
//print(actions.ContinuousActions[0]);
//print(actions.ContinuousActions[1]);
}
if (System.Math.Abs(x) > 3.0f || System.Math.Abs(z) > 3.0f)
{
reward = 0.1f;
//注意角度和弧度的转换,python中的环境使用的是弧度,数值会小很多,因而容易收敛?
}
else
{
reward = 0.5 * System.Math.Exp(-System.Math.Abs(x) * System.Math.PI / 180)
+ 0.5 * System.Math.Exp(-System.Math.Abs(z) * System.Math.PI / 180);
}
AddReward((float)reward); //计算奖励
// if (System.Math.Abs(x) < 0.5f && System.Math.Abs(z) < 0.5f)
// {
// AddReward(1.0f);
// }
// else
// {
// AddReward(-0.1f);
// }
}
private void OnCollisionEnter(Collision collision)
{
// 检查碰撞对象是否是其他物体
if (collision.gameObject.CompareTag("pengpeng"))
{
// 如果智能体与其他物体碰撞,给予-10的奖励
AddReward(-50.0f);
print("done");
// 结束这个episode
EndEpisode();
}
}
void Start()
{
}
// Update is called once per frame
void Update()
{
}
}
以下几点注意事项
三、运行调试
由于没有设计强化学习算法,此时运行应该查看两个问题
①程序没有报错
②滑块每隔一段时间会刷新具体位置(上边的代码是这么规定的设计)