高智商AI设计实战

75 篇文章 2 订阅
6 篇文章 0 订阅

unity中关于MLAGENTS支持智能AI给稍微复杂的变化环境下的AI像人类玩家一样思考,带来了极大惊喜。分享一下机器学习的模式。
如图:在这里插入图片描述
给定的剧情比较简单,要求AI尽可能在纷繁复杂的物体中间捡拾绿色物体,就跟AI需要靠近某些攻击对象或者分类捡拾有用道具的需求基础。上一篇已经给大家介绍了环境的搭建,本篇之间干货。

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using MLAgents;
using System;

public class NPCAgent : Agent
{
    public GameObject ground;
    public GameObject redcube;
    public GameObject greencube;

    RayPerception rayPer;
    Rigidbody rd;

    public override void InitializeAgent()
    {
        base.InitializeAgent();
        rayPer = GetComponent<RayPerception>();
        rd = GetComponent<Rigidbody>();

    }
    public override void CollectObservations()
    {
        float rayDistance = 12f;
        float[] rayAngles = { 20f,60f,90f,120f,160f};
        string[] detectableObjects = { "Cube+", "Cube-","wall" };
        AddVectorObs(GetStepCount()/(float)agentParameters.maxStep);
        AddVectorObs(rayPer.Perceive(rayDistance,rayAngles,detectableObjects,0f,0f));
    }
    public override void AgentAction(float[] vectorAction, string textAction)
    {
      //  AddReward(-1f/agentParameters.maxStep);
        MoveAgent(vectorAction);
    }

    private void MoveAgent(float[] vectorAction)
    {
        if (GameObject.FindGameObjectsWithTag("Cube+").Length <= 0)
        {
            Done();
            return;
        }
           
        Vector3 dirToGo = Vector3.zero;
        Vector3 rotateDir = Vector3.zero;
        
            dirToGo = transform.forward * Mathf.Clamp(vectorAction[0], -1f, 1f);
            rotateDir = transform.up * Mathf.Clamp(vectorAction[1], -1f, 1f);
     
        transform.Rotate(rotateDir,Time.deltaTime*150f);
        rd.AddForce(dirToGo*1.5f,ForceMode.VelocityChange);
    }
    void OnCollisionEnter(Collision col)
    {
       
        if (col.gameObject.CompareTag("Cube+"))
        {
            SetReward(1f);
            col.gameObject.transform.position= new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
            Debug.Log("ok successful!");
           
        }
        if (col.gameObject.CompareTag("Cube-"))
        {
            SetReward(-1f);

            col.gameObject.transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
            Debug.Log("sorry!");
        }
        if (col.gameObject.CompareTag("wall"))
        {
            SetReward(-0.1f);
        }

    }
    public override void AgentReset()
    {
        transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0.45f, UnityEngine.Random.Range(-40f, 40f));
          transform.rotation = Quaternion.Euler(0f, UnityEngine.Random.Range(0f, 360f), 0f);
    //    transform.rotation = Quaternion.Euler(0f, 0f, 0f);
        rd.velocity *= 0f;
        for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube+").Length ; i++)
        {
            GameObject.FindGameObjectsWithTag("Cube+")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
        }
        for (int i = 0; i < GameObject.FindGameObjectsWithTag("Cube-").Length; i++)
        {
            GameObject.FindGameObjectsWithTag("Cube-")[i].transform.position = new Vector3(UnityEngine.Random.Range(-40f, 40f), 0, UnityEngine.Random.Range(-40f, 40f));
        }
    }
}

当见到绿色Cube时奖励1分,见到有毒红色Cube惩罚1分,当碰到墙壁减掉0.1分,这是AI的奖励训练依据。
观察参数要求观察视觉看到的物体与自身的距离,这会让AI的大脑中尽量选择近距离的物体做选择。当然我发现我最后的训练结果就是,并不是一致像一个程序机器人从身边扫描,会跟人一样,有时会选择另外的方向角度去做下步选择,当然大部分是近距离先处理。

   public override List<float> Perceive(float rayDistance,
            float[] rayAngles, string[] detectableObjects,
            float startOffset, float endOffset)
        {
            perceptionBuffer.Clear();
            // For each ray sublist stores categorical information on detected object
            // along with object distance.
            foreach (float angle in rayAngles)
            {
                endPosition = transform.TransformDirection(
                    PolarToCartesian(rayDistance, angle));
                endPosition.y = endOffset;
                if (Application.isEditor)
                {
                    Debug.DrawRay(transform.position + new Vector3(0f, startOffset, 0f),
                        endPosition, Color.black, 0.01f, true);
                }

                float[] subList = new float[detectableObjects.Length + 2];
                if (Physics.SphereCast(transform.position +
                                       new Vector3(0f, startOffset, 0f), 0.5f,
                    endPosition, out hit, rayDistance))
                {
                    for (int i = 0; i < detectableObjects.Length; i++)
                    {
                        if (hit.collider.gameObject.CompareTag(detectableObjects[i]))
                        {
                            subList[i] = 1;
                            subList[detectableObjects.Length + 1] = hit.distance / rayDistance;
                            break;
                        }
                    }
                }
                else
                {
                    subList[detectableObjects.Length] = 1f;
                }

                perceptionBuffer.AddRange(subList);
            }

            return perceptionBuffer;
        }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值