多缸液压机(活动横梁)孪生模型构建(强化学习)

unity版本:2022.3.42f1c1

一、将装配体文件导入unity

装配体文件由solidworks软件构建完成后,由3D MAX软件导出为fbx文件,再由fbx文件导入至unity中(导入教程CSDN自行搜索)

导入完成后设置好模型位置与模型材质

二、构建unity强化学习环境

①安装强化学习插件Barracuda和mlagents

我的安装版本是3.0.0 · March 01, 2022 Barracuda(该插件是调用CPU与GPU训练强化学习的插件)与2.0.1 · November 08, 2021 mlagents(该插件时强化学习工具箱,在设计强化学习程序时,有一些父类文件需要从工具箱中直接继承)

②构造必要的机械关联

为简化设计,将四个调平液压缸的y轴距离与活动横梁的四个受力点进行直接绑定

第一步:创建四个空物体,调整空物体位置使其与调平缸的四个受力点重合

第二步:在四个调平缸与受力点上绑定位置关系

脚本文件内容如下:

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class cy3 : MonoBehaviour
{
    public Transform specifiedPoint3; // 在Unity编辑器中指定
    // Start is called before the first frame update
    void Start()
    {
        
    }

    // Update is called once per frame
    void Update()
    {
        float yCoordinate = specifiedPoint3.position.y;
        Vector3 currentPosition = transform.position;

        // 指定新的Y坐标
        currentPosition.y = yCoordinate;

        // 设置物体的新位置
        transform.position = currentPosition;

        // 打印Y坐标
       // Debug.Log("指定点的Y坐标:" + yCoordinate);
    }
}

第三步:在刚体组件中固定活动横梁的y轴位移与偏转

第四步:设置强化学习环境,在滑块组件下设置脚本文件,内容如下,代码内容自行理解

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;

public class MyAgent : Agent   //改为要继承的父类
{
    Rigidbody huakuai;
    // Start is called before the first frame update

    public override void Initialize()
    {
        huakuai = GetComponent<Rigidbody>();
    }

    public override void OnEpisodeBegin()
    {
        // If the Agent fell, zero its momentum

        this.huakuai.angularVelocity = Vector3.zero;
        this.huakuai.velocity = Vector3.zero;
        this.transform.localPosition = new Vector3(0, -0.37f, 0);
        this.transform.localEulerAngles = new Vector3(Random.value * 8 - 8, 0, Random.value * 8 - 8);
    }

    public override void CollectObservations(VectorSensor sensor)
    {
        sensor.AddObservation(gameObject.transform.rotation.z);
        sensor.AddObservation(gameObject.transform.rotation.x);
        sensor.AddObservation(huakuai.angularVelocity.z);
        sensor.AddObservation(huakuai.angularVelocity.x);
    }

    public override void Heuristic(in ActionBuffers actionsOut)
    {
        var continuousActionsOut = actionsOut.ContinuousActions;
        continuousActionsOut[1] = Input.GetAxis("Horizontal");
        continuousActionsOut[2] = Input.GetAxis("Vertical");
        //continuousActionsOut[2] = Input.GetKey("KeyCode.W");
        //continuousActionsOut[3] = Input.GetAxis("D");
    }

    public override void OnActionReceived(ActionBuffers actions)
    {
        float x;
        float z;
        var action1 = System.Math.Tanh(actions.ContinuousActions[0]);
        var action2 = System.Math.Tanh(actions.ContinuousActions[1]);
        var action3 = System.Math.Tanh(actions.ContinuousActions[2]);
        var action4 = System.Math.Tanh(actions.ContinuousActions[3]);

        if (this.transform.localEulerAngles.x > 180)
            x = this.transform.localEulerAngles.x - 360;
        else
            x = this.transform.localEulerAngles.x;

        if (this.transform.localEulerAngles.z > 180)
            z = this.transform.localEulerAngles.z - 360;
        else
            z = this.transform.localEulerAngles.z;
        //Debug.Log("Horizontal:" + vectorAction[0]);
        //Debug.Log("Vertical:" + vectorAction[1]);
        //Debug.Log("x:" + System.Math.Abs(this.transform.localEulerAngles.x));
        Vector3 control1 = Vector3.zero; //定义控制信号
        Vector3 control2 = Vector3.zero; //定义控制信号
        Vector3 control3 = Vector3.zero; //定义控制信号
        Vector3 control4 = Vector3.zero; //定义控制信号

        control1.y = 2 * (float)action1;
        control2.y = 2 * (float)action2;
        control3.y = 2 * (float)action3;
        control4.y = 2 * (float)action4;

        Vector3 position1 = this.transform.position + new Vector3((float)0.6, 0, (float)0.6);
        Vector3 position2 = this.transform.position + new Vector3((float)0.6, 0, (float)-0.6);
        Vector3 position3 = this.transform.position + new Vector3((float)-0.6, 0, (float)0.6);
        Vector3 position4 = this.transform.position + new Vector3((float)-0.6, 0, (float)-0.6);

        huakuai.AddForceAtPosition(control1, position1);
        huakuai.AddForceAtPosition(control2, position2);
        huakuai.AddForceAtPosition(control3, position3);
        huakuai.AddForceAtPosition(control4, position4);


        if (System.Math.Abs(x) > 15 || System.Math.Abs(z) > 15)
        {
            EndEpisode();
        }

        double reward;

        //if (x < 0.5 || x > -0.5)
        {
            // AddReward(1f);
            //print(actions.ContinuousActions[0]);
            //print(actions.ContinuousActions[1]);
        }

        if (System.Math.Abs(x) > 3.0f || System.Math.Abs(z) > 3.0f)
        {
            reward = 0.1f;
            //注意角度和弧度的转换,python中的环境使用的是弧度,数值会小很多,因而容易收敛?
        }
        else
        {
            reward = 0.5 * System.Math.Exp(-System.Math.Abs(x) * System.Math.PI / 180)
                    + 0.5 * System.Math.Exp(-System.Math.Abs(z) * System.Math.PI / 180);
        }

        AddReward((float)reward); //计算奖励




        //    if (System.Math.Abs(x) < 0.5f && System.Math.Abs(z) < 0.5f)
        //    {
        //        AddReward(1.0f);
        //    }
        //    else
        //    {
        //        AddReward(-0.1f);
        //    }
    }

    private void OnCollisionEnter(Collision collision)
    {
        // 检查碰撞对象是否是其他物体
        if (collision.gameObject.CompareTag("pengpeng"))
        {
            // 如果智能体与其他物体碰撞,给予-10的奖励
            AddReward(-50.0f);
            print("done");
            // 结束这个episode
            EndEpisode();
        }
    }

    void Start()
    {
        
    }

    // Update is called once per frame
    void Update()
    {
        
    }
}

以下几点注意事项

三、运行调试

由于没有设计强化学习算法,此时运行应该查看两个问题

①程序没有报错

②滑块每隔一段时间会刷新具体位置(上边的代码是这么规定的设计)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值