[C# & UNITY] Roll a - 끄적낙서

원래 예로 ML 에이전트로 강화 학습을 시도했습니다. (교수님 대단하다)

https://corinyoungee.8

요기 롤-어-볼 스크립트

쉬운 목차

1. 계층 구조 변경

강화 학습을 수행하려면 모든 항목을 TrainingArea의 폴더로 이동해야 합니다.

나는 또한 이름을 RollerBall로 변경했습니다.

2. 픽업 횟수 줄이기

강화 학습은 노트북에서 하기 때문에 목표 수를 과용하지 않도록 5개에서 3개로 줄였습니다.

3.스크립트

카메라 컨트롤러
- 카메라 관련 스크립트이므로 mlagents와는 아무런 관련이 없습니다.
회전자
- 대상을 작동시키는 스크립트이므로 mlagents와 관련이 없습니다.
플레이어 컨트롤러
- ml 에이전트에 적용할 수 있도록 이 스크립트를 수정해야 합니다.

4. ML 에이전트를 프로젝트에 연결합니다.

창 -> 패키지 관리자 -> 디스크에서 패키지 추가 -> com.unity.ml-agents -> package.json 파일 열기.

package.json 파일을 별도로 다운로드해야 합니다.

5. PlayerController 스크립트를 수정합니다.

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;

using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;

// PlayerController 위에 Agent라는 클래스가 있다.
// class child : parent
public class PlayerController : Agent
{
    public float speed; 
    Rigidbody rBody; 
    private int count; 

    public Text CountText;
    public Text WinText;

    int num_pickup = 3;
    public GameObject PickUp0;
    public GameObject PickUp1;
    public GameObject PickUp2;

    private int frames; // 프레임 개수
    public int max_frames; // 최대 허용 가능한 프레임 개수
    // 게임이 시작되고 일정 프레임을 넘어가게 되면 종료

    // 게임 시작할 때 맨 처음 시작하는 함수
    void Start() 
    {
        rBody = GetComponent<Rigidbody>();
        count = 0;
        frames = 0;
        SetText();
    }

    void Update() // 매 프레임마다 실행
    {
        ++frames; // 매 프레임마다 프레임 개수 증가
    }

    // 원래는 키보드로부터 입력을 받아 rigidbody에 입력하는 것
    // 우리는 이것을 지우고 나중에 onactionreceived에 입력된 control 값을 반영
    //void FixedUpdate()
    //{
        //float moveHorizontal = Input.GetAxis("Horizontal");
        //float moveVertical = Input.GetAxis("Vertical");

        //Vector3 movement = new Vector3(moveHorizontal, 0, moveVertical);
        //rBody.AddForce(movement * speed);
    //}

    // 충돌이 발생했을 때 관련된 함수, pickup과 부딪혔을때
    void OnTriggerEnter(Collider other) 
    {
        if (other.gameObject.CompareTag("PickUp"))
        {
            // 카운트를 증가시켜주고
            other.gameObject.SetActive(false); // 부딪히면 active가 false
            count = count + 1;
            SetText();

            // 보상을 증가시켜준다.
            SetReward(1.0f);
        }
    }

    void SetText()
    {
        if(count >= num_pickup)
        {
            CountText.text = "";
            WinText.text = "You Win!";
        }
        else
        {
            CountText.text = "Count:" + count.ToString();
            WinText.text = "";
        }
    }

    // ML-Agent (OnEpisodeBegin : 매 에피소드가 실행될 때)
    // PlayerController 위에 Agent라는 클래스가 있는데
    // Agent 클래스에 OnEpisodeBegin라는 함수가 있다.
    // 여기서 OnEpisodeBegin 딱히 정의되는 것이 없다.
    // child클래스에서 overriding을 통해 OnEpisodeBegin함수를 정의
    // parent클래스에 정의된 OnEpisodeBegin는 깡통과 다름 없다.
    // child 클래스에서 재정의한다.
    public override void OnEpisodeBegin(){
        // 일단 경기장이 벽으로 막혀있는데
        // 한가지 팁을 주자면 강화학습은 빠르게 여러개의 에피소드를 만들어서
        // 학습을 해야하는데, 벽이 있으면 종료가되지 않고 한 에피소드가 길어지므로 
        // 벽을 없애주는 것이 좋다.
        // Reset player's position $ velocity
        if (this.transform.localPosition.y < -0.0f){
            // plane을 보면 y=0, player=0.5이므로 상황에 맞춰서 설정
            this.rBody.angularVelocity = Vector3.zero; //각속도 초기화
            this.rBody.velocity = Vector3.zero; // 속도 초기화
            // Random.value : 0과 1사이의 실수값을 내보내는 함수
            // x값은 -5에서 5까지, y값도 -5에서 5까지
            // plane의 크기에 따라 설정해주면 된다.
            this.transform.localPosition = new Vector3(Random.value * 10.0f - 5.0f, 0.5f, Random.value * 10.0f - 5.0f);
        }

        // Reset pickup's position
        // 여러개의 pickup들을 다 초기화해준다.
        // x값,z값은 -7.5에서 7.5 사이에 존재.
        PickUp0.transform.localPosition = new Vector3(Random.value * 15.0f - 7.5f, 0.8f, Random.value * 15.0f - 7.5f);       
        PickUp1.transform.localPosition = new Vector3(Random.value * 15.0f - 7.5f, 0.8f, Random.value * 15.0f - 7.5f);       
        PickUp2.transform.localPosition = new Vector3(Random.value * 15.0f - 7.5f, 0.8f, Random.value * 15.0f - 7.5f);    
        
        // 위에서 pickup이 부딪히면 active를 비활성화 시켰으므로 에피소드가 다시 실행될 때 다시 활성화를 시켜줘야한다.   
        // Active pickup
        PickUp0.SetActive(true);
        PickUp1.SetActive(true);
        PickUp2.SetActive(true);

        // Reset others
        count = 0;
        frames = 0;
        SetText();
    }

    // ML-Agent (CollectObservation)
    //단계에 대한 에이전트의 벡터 관찰을 수집합니다. 
    // 에이전트 관찰은 에이전트의 관점에서 현재 환경을 설명합니다.
    public override void CollectObservations(VectorSensor sensor){
        // Agent's position
        sensor.AddObservation(this.transform.localPosition);

        // Agent's velocity
        sensor.AddObservation(rBody.velocity.x);
        sensor.AddObservation(rBody.velocity.z);

        // 타겟의 위치(타겟과 플레이어의 x,z 위치 차이로 입력)
        // Difference between Agent and pickups
        Vector3 diff0;
        // 여기서 그냥 차이를 구하면 되는 것이 아니라
        // 이미 획득한 타겟의 위치정보는 넣어주면 안된다.
        // active한 타겟만 거리정보를 구하고 acitve하지 않은 타겟은 zerovector값을 입력
        if (PickUp0.activeSelf){
            // activeSelf는 readonly. acitve를 바꿔주려면 setactive로
            diff0 = this.transform.localPosition - PickUp0.transform.localPosition;  
        }
        else{ // active하지 않을 경우엔 의미 없는 값 입력
            diff0 = new Vector3(0, -10.0f, 0);
        }

        Vector3 diff1;
        if (PickUp1.activeSelf){
            diff1 = this.transform.localPosition - PickUp1.transform.localPosition;  
        }
        else{
            diff1 = new Vector3(0, -10.0f, 0);
        }

        Vector3 diff2;
        if (PickUp2.activeSelf){
            diff2 = this.transform.localPosition - PickUp2.transform.localPosition;  
        }
        else{
            diff2 = new Vector3(0, -10.0f, 0);
        }

        sensor.AddObservation(diff0);
        sensor.AddObservation(diff1);
        sensor.AddObservation(diff2);
    }

    // ML-Agent (OnAcitveReceived)
    public float forceMultiplier = 10.0f;
    public override void OnActionReceived(ActionBuffers actionBuffers){
        // Actions의 크기는 2개다.
        Vector3 controlSignal = Vector3.zero;
        controlSignal.x = actionBuffers.ContinuousActions(0);
        controlSignal.z = actionBuffers.ContinuousActions(1);
        rBody.AddForce(controlSignal * forceMultiplier);

        // 에피소드를 언제 끝내야 할지를 정해줘야한다.
        // check termination
        // count가 pickup개수와 같아지면 에피소드 종료
        if (count == num_pickup){
            EndEpisode();
        }
        // frame의 개수가 우리가 설정한 max_frames의 개수보다 커질 때 에피소드 종료
        else if (frames > max_frames){
            EndEpisode();
        }
        // agent가 plane에서 떨어지면 보상값 -1점을 주고, 에피소드 종료
        else if (this.transform.localPosition.y < 0.0f){
            SetReward(-1.0f);
            EndEpisode();
        }
    }

    // ML-Agent (Heuristic : 잘 돌아가는지 테스트를 하기 위해)
    public override void Heuristic(in ActionBuffers actionsOut){
        var continuousActionsOut = actionsOut.ContinuousActions;
        continuousActionsOut(0) = Input.GetAxis("Horizontal");
        continuousActionsOut(1) = Input.GetAxis("Vertical");
    }
}

다음에,