0

我正在尝试为奥赛罗游戏编写极小极大算法。当有更好的位置可供使用时,我的代码会从它应该选择的列表中选择第一个可用位置:

例如,这是我的驱动程序代码:

board = np.array([['e', 'B', 'B', 'B'],
                  ['R', 'B', 'R', 'R'],
                  ['B', 'R', 'R', 'e'],
                  ['e', 'e', 'R', 'e']])
emptyspots = [[0, 0], [2, 3], [3, 0], [3, 1], [3, 3]]
player = 'B'
otherplayer = 'R'
takenred = [[1,0],[1, 2], [1, 3],[2,1], [2, 2],[3,2]]
takenblue = [[0,1],[0,2],[0, 3],[1,1],[2,0]]
validblue = [[0,0],[2,3],[3,1],[3,3]]
validred = [[0,0],[3,0]]
print(BestMove(deepcopy(board), deepcopy(emptyspots), player, otherplayer, deepcopy(takenred), deepcopy(takenblue),
                deepcopy(validblue), deepcopy(validred)))

我的代码选择了 [0,0],而它真正应该选择 [2,3]

这是我实际的 AI minimax 代码:

def evaluation(board, emptyspots, player, otherplayer, takenred, takenblue,validblue,validred,depth):
    if depth==0:
        if len(takenblue) > len(takenred):
            return 10
        if len(takenblue) < len(takenred):
            return -10
        return 0


def minimax(board, emptyspots, player, otherplayer, takenred, takenblue, depth, ismaximizing, validblue, validred):
    score = evaluation(deepcopy(board), deepcopy(emptyspots), player, otherplayer, deepcopy(takenred),
                       deepcopy(takenblue),deepcopy(validblue),deepcopy(validred),depth)
    if score == 10:
        return score
    if score == -10:
        return score
    if score==0:
        return 0
    if ismaximizing:
        best = -1000
        for q in validblue:
            board, newtakenred, newtakenblue = main(deepcopy(board), q, deepcopy(takenred), deepcopy(takenblue), player,
                                                    otherplayer)
            best = max(best,
                       deepcopy(minimax(deepcopy(board), deepcopy(emptyspots), player, otherplayer, deepcopy(newtakenred),
                               deepcopy(newtakenblue), depth -1, not ismaximizing,
                               deepcopy(validblue), deepcopy(validred))))
        return best
    else:
        best = 1000
        for z in validred:
            board, newtakenred, newtakenblue = main(deepcopy(board), z, deepcopy(takenred), deepcopy(takenblue), player,
                                                    otherplayer)
            best = min(best,
                       deepcopy(minimax(deepcopy(board), deepcopy(emptyspots), player, otherplayer, deepcopy(newtakenred),
                               deepcopy(newtakenblue), depth -1, not ismaximizing,
                               deepcopy(validblue), deepcopy(validred))))
        return best


def BestMove(board, emptyspots, player, otherplayer, takenred, takenblue, validblue, validred):
    bestvalue = -1000
    bestmove = [-1, -1]
    for j in validblue:
        board, newtakenred, newtakenblue = main(deepcopy(board), j, deepcopy(takenred), deepcopy(takenblue), player,
                                                otherplayer)
        movevalue = minimax(deepcopy(board), deepcopy(emptyspots), player, otherplayer, deepcopy(newtakenred),
                            deepcopy(newtakenblue), 4, True,
                            deepcopy(validblue), deepcopy(validred))
        if movevalue > bestvalue:
            bestmove = j
            bestvalue = movevalue
    return bestmove

任何帮助将不胜感激

4

0 回答 0