我正在开发一个模拟游戏,您需要在地图上的某个地方放置一个医疗站。然后模拟运行并计算位置的有效性(它不能太远或太靠近其他建筑物)。我想训练一个 MLAgent 来找到最佳位置。所以我手动调用了 RequestDecision() 方法,因为它需要等待模拟结束才能获得奖励(然后模拟重置,代理重试)。但它并没有像预期的那样调用 OnActionReceived() 。这是我的代码:
public class AIController : Agent
{
public LayerMask ground;
public float[] mapSize = new float[2];
private void Update()
{
SimulationMaster.SimulationState state = SimulationMaster.Instance.State;
if (state == SimulationMaster.SimulationState.End)
{
SetReward(-SimulationMaster.Instance.GetTotalScore());
EndEpisode();
}
else if (state == SimulationMaster.SimulationState.EditMode)
{
RequestDecision();
}
}
public override void OnEpisodeBegin()
{
SimulationMaster.Instance.ResetToEditMode();
}
private void SpawnMedicalStation(float x, float y)
{
Ray ray = new Ray(new Vector3(x * mapSize[0], 10, y * mapSize[1]), Vector3.down);
if (!Physics.Raycast(ray, out RaycastHit hitInfo, Mathf.Infinity, ground))
{
Debug.LogError("AIController - Out of bound");
return;
}
SpawnManager.Instance.SpawnMedicalStation(hitInfo.point, 2, 2);
}
public override void CollectObservations(VectorSensor sensor)
{
Vector3 pos = TacticEventManager.Instance.TacticEvents[0].Barycentre;
sensor.AddObservation(new Vector2(pos.x, pos.z));
}
public override void OnActionReceived(ActionBuffers actions)
{
if (SimulationMaster.Instance.State != SimulationMaster.SimulationState.EditMode) return;
float x = actions.ContinuousActions[0];
float y = actions.ContinuousActions[1];
Debug.Log($"Action received: ({x}, {y})");
SpawnMedicalStation(x, y);
SimulationMaster.Instance.StartSimulation();
}
}