1

我正在尝试使用 alpha/beta 修剪实现 MiniMax 算法。完全卡住了,看不到我错在哪里。MiniMax 类包含一个状态和一个动作。getAction 方法返回一个 Action(应该是最好的操作)。Game 对象有两个方法,如果状态是“结束状态”,isTerminal 返回 true,即该状态没有更多的孩子。getUtility 返回与结束状态关联的值。

我试图通过使用反击来决定是 Max 还是 Min 转弯。目前算法没有返回正确的动作,在这个阶段我只专注于让扩展顺序正确,让修剪位工作。

public class MiniMax<State,Action> {


int alpha = Integer.MIN_VALUE;
int beta = Integer.MAX_VALUE;
int foundMax = 0;

int round = -1;
Action action = null;

public Action getAction(Game<State, Action> game, State state)
{
    round++;

    List<Successor<State, Action>> successors = (List<Successor<State, Action>>) game.getSuccessors(state);

    for (int i=0; i < successors.size(); i++)
    {
        if ( round % 2 == 0 )
        {
            if ( game.isTerminal(successors.get(i).state))
            {
                System.out.println("Max turn: Node: " + successors.get(i).state + ", utility: " + game.getUtility(successors.get(i).state) + ", alpha: " + alpha + ", beta: " + beta);
                alpha = Integer.max(alpha, game.getUtility(successors.get(i).state));
                if ( alpha <= beta )
                {
                    break;
                }

                action = successors.get(i).action;
            }
            if ( !game.isTerminal( successors.get(i).state) )
            {
                action = getAction(game, successors.get(i).state);
            }
        }
        else
        {
            if ( game.isTerminal(successors.get(i).state))
            {
                System.out.println("Min turn: Node: " + successors.get(i).state + ", utility: " + game.getUtility(successors.get(i).state) + ", alpha: " + alpha + ", beta: " + beta);
                beta = Integer.min(beta, game.getUtility(successors.get(i).state));
                if ( beta <= alpha )
                {
                    break;
                }

                action = successors.get(i).action;
            }
            if ( !game.isTerminal( successors.get(i).state) )
            {
                action = getAction(game, successors.get(i).state);
            }
        }
    }

    return action;
}

}

4

0 回答 0