c++ - 返回 tictactoe 的 minimax 算法中的 bestMove

Question

我试图编写 Russel Norvig 的人工智能书中给出的井字游戏的极小极大算法。除了将 bestMove 返回给用户的方式之外，它拥有一切。我正在努力返回 bestMove，但无法决定何时选择 bestMove。帮助，有人吗？

moveT MiniMax(stateT state)
{
    moveT bestMove;

    max_move(state,bestMove);

    return bestMove;

}

int max_move(stateT state,int & bestMove)
{
    int v = -10000;
    if(GameIsOver(state))
    {
        return EvaluateStaticPosition(state);

    }

    vector<moveT> moveList;
    GenerateMoveList(state, moveList);
    int nMoves = moveList.size();

    for(int i = 0 ; i < nMoves ; i++)
    {
        moveT move = moveList[i];
        MakeMove(state, move);

        int curValue = min_move(state,bestMove);

            if(curValue > v)
            {
              v = curValue;
              bestMove = move;
            }
        RetractMove(state, move);

    }

    return v;

}

int min_move(stateT state, int &bestMove)
{
    int v = 10000;
    if(GameIsOver(state))
    {
      return EvaluateStaticPosition(state);

    }
    vector<moveT> moveList;
    GenerateMoveList(state, moveList);

    int nMoves = moveList.size();

    for(int i = 0 ; i < nMoves; i++)
    {
        moveT move = moveList[i];
        MakeMove(state, move);

        int curValue = max_move(state,depth+1,bestMove);

            if(curValue < v)
            {
              curValue = v;
            }
        RetractMove(state, move);

    }
    return v;
}

PS：还有其他伪代码可以找到 minmax 值。但是，他们只专注于井字游戏，我正在尝试将其扩展到其他游戏。谢谢。

更新：整个代码可以在这里找到：http: //ideone.com/XPswCl

score 7 · Accepted Answer

在极小极大的最简单版本中，第一个玩家希望最大化他的分数，而第二个玩家希望最小化第一个玩家的分数。由于第一个和第二个玩家都只关心第一个玩家的分数，EvaluateStaticPosition因此应该返回一个值，指示第一个玩家的棋盘状态有多好。轮到谁不重要。

int EvaluateStaticPosition(stateT state)
{
        if(CheckForWin(state, FIRST_PLAYER))
        {
                return WINNING_POSITION;
        } 
        if(CheckForWin(state, Opponent(FIRST_PLAYER)))
        {
                return LOSING_POSITION;
        } 
        return NEUTRAL_POSITION;
}

现在，当您想要最适合第一个玩家的移动时，请调用 MaxMove。当您想要最适合第二名玩家的移动时，请调用 MinMove。

moveT MiniMax(stateT state)
{
    moveT bestMove;
    int i = 0;
    if (state.whoseTurn == FIRST_PLAYER){
        i = MaxMove(state, bestMove);
    }
    else{
        i = MinMove(state,bestMove);
    }
    cout<<"i is "<<i<<endl;
    return bestMove;
}

MinMove最后，你在and内部有一些问题MaxMove。当您分配curRating其中任何一个时，您不应将bestMove其作为第二个参数传递给MaxMoveor MinMove。然后它将对手的最佳移动放入bestMove，这是没有意义的。相反，声明一个opponentsBestMove对象并将其作为第二个参数传递。（您实际上不会使用该对象，甚至不会在之后查看它的值，但这没关系）。通过该更改，您永远不会为bestMovewithin分配任何内容MinMove，因此您应该在if(curRating < v)块内执行此操作。

int MaxMove(stateT state, moveT &bestMove)
{
        if(GameIsOver(state))
        {
            return EvaluateStaticPosition(state);
        }
        vector<moveT> moveList;
        GenerateMoveList(state, moveList);
        int nMoves = moveList.size();
        int v = -1000;
        for(int i = 0 ;i<nMoves; i++)
        {
                moveT move = moveList[i];
                MakeMove(state, move);
                moveT opponentsBestMove;
                int curRating = MinMove(state, opponentsBestMove);
                if (curRating > v)
                {
                        v = curRating;
                        bestMove = move;
                }
                RetractMove(state, move);
        }
        return v;

}
int MinMove(stateT state,  moveT &bestMove)
{
        if(GameIsOver(state))
        {
                return EvaluateStaticPosition(state);
        }
        vector<moveT>moveList;
        GenerateMoveList(state, moveList);
        int nMoves = moveList.size();
        int v = 1000;
        for(int i = 0 ; i<nMoves; i++)
        {
                moveT move = moveList[i];
                MakeMove(state , move);
                moveT opponentsBestMove;
                int curRating = MaxMove(state,opponentsBestMove);
                if(curRating < v)
                {
                        v = curRating;
                        bestMove = move;
                }
                RetractMove(state, move);
        }
        return v;
}

在这一点上，你应该有一个无与伦比的 AI！

The final position looks like this:

 O | O | X
---+---+---
 X | X | O
---+---+---
 O | X | X

Cat's game.

另一种方法利用井字游戏是零和游戏这一事实。换句话说，在游戏结束时，玩家的得分总和将为零。对于两人游戏，这意味着一个玩家的分数将永远是另一个玩家的负数。这对我们来说很方便，因为最小化其他玩家的分数与最大化自己的分数是相同的。因此，我们可以让两个玩家都尝试最大化自己的分数，而不是让一个玩家最大化他的分数，而一个玩家最小化另一个玩家的分数。

改EvaluateStaticPosition回其原始形式，以便根据当前玩家的棋盘状态的好坏给出分数。

int EvaluateStaticPosition(stateT state)
{
        if(CheckForWin(state, state.whoseTurn))
        {
                return WINNING_POSITION;
        }
        if(CheckForWin(state, Opponent(state.whoseTurn)))
        {
                return LOSING_POSITION;
        }
        return NEUTRAL_POSITION;
}

删除MinMove，因为我们只关心最大化。重写MaxMove，以便它选择给对手最差分数的移动。最好棋的得分是其他玩家最差得分的负数。

int MaxMove(stateT state, moveT &bestMove)
{
        if(GameIsOver(state))
        {
                return EvaluateStaticPosition(state);
        }
        vector<moveT> moveList;
        GenerateMoveList(state, moveList);
        int nMoves = moveList.size();
        int v = -1000;
        for(int i = 0 ;i<nMoves; i++)
        {
                moveT move = moveList[i];
                MakeMove(state, move);
                moveT opponentsBestMove;
                int curRating = -MaxMove(state, opponentsBestMove);
                if (curRating > v)
                {
                        v = curRating;
                        bestMove = move;
                }
                RetractMove(state, move);
        }
        return v;

}

由于MaxMove用于两个玩家，我们不再需要在MiniMax函数中区分玩家。

moveT MiniMax(stateT state)
{
    moveT bestMove;
    int i = 0;
    i = MaxMove(state, bestMove);
    cout<<"i is "<<i<<endl;
    return bestMove;
}

score 4 · Accepted Answer

好吧，它看起来MiniMax为你正确地选择了它，只需用初始状态和深度调用它。（除非根据状态的第一个玩家是第二个玩家，那么你应该在 MiniMax 中调用 min_move。）

编辑：是的，我忽略了一些东西，bestMove 目前没有多大意义。在 max_move 中的程序中，您可以像这样更改循环：

for(int i = 0 ; i < nMoves ; i++)
{
    moveT move = moveList[i];
    MakeMove(state, move);

    int new_value = min_move(state, depth+1);
    if(new_value > v)
    {
      v=new_value;
    }
    RetractMove(state, move);

}

在那之后你可以想想你bestMove是什么意思？我的想法是，您有兴趣找到井字游戏的“最佳”系列动作之一。为此，您需要一个 vector 甚至更好的stack。但这也意味着将其std::stack<int>* best_moves作为最后一个参数。

对于堆栈实现，在 min_move 中，您返回下一个动作，如果它们的值是最好的，您将把您推到堆栈move顶部。best_moves当然，在游戏结束时，您只需返回空堆栈即可。它需要一个面向对象的方法来正确地完成它，我会在有时间的时候做。

如果您只需要最好的下一步行动，那么我建议您将 min_move 和 max_moe 的返回类型更改为如下结构：

struct Value_move{
  int value;
  moveT best_move;
};

那么 max_move 的新实现如下所示：

const int MOVE_INVALID = -12345;
const int MOVE_NOTHING = -12346;

Value_move max_move(stateT state, int depth)
{
    Value_move best;
    best.value = -10000; best.best_move = MOVE_INVALID;

    if(GameIsOver(state))
    {
        best.value = EvaluateStaticPosition(state);
        best.best_move = MOVE_NOTHING;
        return best;
    }

    vector<moveT> moveList;
    GenerateMoveList(state, moveList);
    int nMoves = moveList.size();

    for(int i = 0 ; i < nMoves ; i++)
    {
        moveT move = moveList[i];
        MakeMove(state, move);
        Value_move curr = min_move(state, depth+1);
        if(curr.value > best.value)
        {
            best.value = curr.value;
            best.best_move = move;
        }
        RetractMove(state, move);

    }

    return v;

}

您只需要在 MiniMax 函数的返回结构中提取 best_move 字段。

备注：
您必须承认，尽管这在许多方面都不像 c++ 程序，但更像是 ac 程序。否则，CapitalCamelCase 中的所有函数都应该是类方法，你应该通过 (const) ref 而不是 value 传递状态——但只有当状态真的是 typedef 后面的指针时，整个代码才有意义。

score 0 · Accepted Answer

您的代码找到了正确的值，然后通过向下传递相同的引用来覆盖它。

int curValue = min_move(state,bestMove);

应该成为

moveT nextMove; // No need to actually do anything with this value
int curValue = min_move(state,nextMove);

您还需要在 min_move 函数中进行相同类型的更改。

注意：在min_move您的代码调用max_move中，参数比您为函数定义的要多。

c++ - 返回 tictactoe 的 minimax 算法中的 bestMove

3 回答 3

Related

Reference