0

我正在开发一个模拟游戏,您需要在地图上的某个地方放置一个医疗站。然后模拟运行并计算位置的有效性(它不能太远或太靠近其他建筑物)。我想训练一个 MLAgent 来找到最佳位置。所以我手动调用了 RequestDecision() 方法,因为它需要等待模拟结束才能获得奖励(然后模拟重置,代理重试)。但它并没有像预期的那样调用 OnActionReceived() 。这是我的代码:

public class AIController : Agent
{
    public LayerMask ground;
    public float[] mapSize = new float[2];

    private void Update()
    {
        SimulationMaster.SimulationState state = SimulationMaster.Instance.State;
        if (state == SimulationMaster.SimulationState.End)
        {
            SetReward(-SimulationMaster.Instance.GetTotalScore());
            EndEpisode();
        }
        else if (state == SimulationMaster.SimulationState.EditMode)
        {
            RequestDecision();
        }
    }
    
    public override void OnEpisodeBegin()
    {
        SimulationMaster.Instance.ResetToEditMode();
    }

    private void SpawnMedicalStation(float x, float y)
    {
        Ray ray = new Ray(new Vector3(x * mapSize[0], 10, y * mapSize[1]), Vector3.down);
        if (!Physics.Raycast(ray, out RaycastHit hitInfo, Mathf.Infinity, ground))
        {
            Debug.LogError("AIController - Out of bound");
            return;
        }
        SpawnManager.Instance.SpawnMedicalStation(hitInfo.point, 2, 2);
    }
    
    public override void CollectObservations(VectorSensor sensor)
    {
        Vector3 pos = TacticEventManager.Instance.TacticEvents[0].Barycentre;
        sensor.AddObservation(new Vector2(pos.x, pos.z));
    }

    public override void OnActionReceived(ActionBuffers actions)
    {
        if (SimulationMaster.Instance.State != SimulationMaster.SimulationState.EditMode) return;
        float x = actions.ContinuousActions[0];
        float y = actions.ContinuousActions[1];
    
        Debug.Log($"Action received: ({x}, {y})");
        SpawnMedicalStation(x, y);
        SimulationMaster.Instance.StartSimulation();
    }
}
4

0 回答 0