c# - 用于跳棋/跳棋的 Negamax 实施

Question

我一直在尝试为用 Unity3D 制作的跳棋游戏实现一个好的 AI，通过在线搜索我发现最好的选择是 MiniMax/Negamax，所以我创建了这个类：

public static class NegaMax
{
    public static IMiniMaxNode FindBestChoice(IEnumerable<IMiniMaxNode> choices, int depth, int sign)
    {
        // if I simply use -Calculate(...) I'm obtaining different results based on whether the depth is even or odd
        // I suspect this is wrong nonetheless
        int inverter = (depth % 2 == 0) ? 1 : -1;

        IMiniMaxNode bestNode = null;
        float alpha = float.NegativeInfinity;
        foreach (var choice in choices)
        {
            var score = inverter * Calculate(choice, depth - 1, float.NegativeInfinity, -alpha, -sign);
            if (score > alpha)
            {
                alpha = score;
                bestNode = choice;
            }
        }

        return bestNode;
    }

    private static float Calculate(IMiniMaxNode node, int depth, float alpha, float beta, int sign)
    {
        if (depth == 0)
        {
            return node.CalculateValue() * sign;
        }

        node.CreateChildren();

        if (node.Children.Length == 0) // if the opponent has no possible move
        {
            return sign / 0f; // (sign == 1) ? positive infinity : negative infinity
        }

        // standard negamax
        var bestValue = float.NegativeInfinity;
        for (int i = 0; i < node.Children.Length; i++)
        {
            var value = -Calculate(node.Children[i], depth - 1, -beta, -alpha, -sign);

            bestValue = Math.Max(bestValue, value);
            alpha = Math.Max(alpha, bestValue);

            if (alpha >= beta)
            {
                return bestValue;
            }
        }

        return bestValue;
    }
}

其中 IMiniMaxNode 是以下接口：

public interface IMiniMaxNode
{
    IMiniMaxNode Parent { get; }
    IMiniMaxNode[] Children { get; }

    float CalculateValue();
    void CreateChildren();
}

实际的实现是这样的：

public class CheckersMove : IMiniMaxNode
{
    public int Team { get; private set; }
    public IMiniMaxNode Parent { get; private set; }
    public IMiniMaxNode[] Children { get; private set; }

    public float CalculateValue()
    {
        // data.state is the current board array after this move has been applied
        // the board array is an int[8,8]:
        //     empty = 0, black pawn = -1, black king = -2, white pawn = 1, white king = 2
        //
        // and GetAbsoluteValue() simply returns a Sum of each element

        return data.state.GetAbsoluteValue() * Team;
    }

    public void CreateChildren()
    {
        // calculate every possible move for the opponent and assign them to the Children array
        // every child has Team = -Parent.Team
        // also, if a move has multiple jumps, they all get included in the same node
    }

    // ... other methods and properties to calculate the possible moves, 
    // and to store movement data (starting position, movement deltas, jump[s], promotion)
}

然后我在我的 CheckersAI 类中使用它：

private static MovementData CalculateMoveInternal(State state)
{
    // - state.team represents the team that has to make a move:
    //       black = -1, white = +1
    // - state.input is the current board state, represented an int[8,8] array: 
    //       empty = 0, black pawn = -1, black king = -2, white pawn = 1, white king = 2
    // - state.depth is simply the depth argument for the negamax function

    // create an empty root node, corresponding to the opponent's last move (hence -state.team)
    var move = CheckersMove.CreateRoot(state.input, -state.team);

    // calculate all possible moves for the current team
    move.CreateChildren();

    // find the best node amongst all of the root node children (shuffled to obtain different results if the best moves have the same value)
    var result = NegaMax.FindBestChoice(move.Children.Shuffle(), state.depth, state.team) as CheckersMove;

    // cache some values for debugging
    _lastRootMove = move;
    _lastChosenMove = result;

    // convert the result node into a valid move for the engine
    return CreateMovementData(result);
}

AI 在大部分比赛中似乎都可以正常工作，但有时会做出明显错误的决定（例如，无缘无故地牺牲 2 个棋子），有时它会反转“无可能移动”情况的价值，因为它会将其分配为正数无穷大（胜利），而它应该是负无穷大（失败）。

我 90% 确定问题出在某个地方的错误标志上，但我一直在尝试每种方法中所有可能的标志组合，而 AI 总是做出意想不到的决定，我正在以黑队 (-1) 和白队（+1）。

谁能帮助我，指出我可能做错了什么？我试图包含所有相关的代码并评论每一个重要的段落。谢谢你！

c# - 用于跳棋/跳棋的 Negamax 实施

0 回答 0

Related

Reference