嘿,我正在尝试使用 alpha-beta 和 4 名玩家的静默搜索来实现 negamax,但它有点不同。例如,假设0 = Red
、1 = Blue
、2 = Yellow
、 和3 = Green
以及 Red 和 Green 在同一个团队中,这意味着 2 个回合将是最大化者,2 个回合将是最小化者。这是我基于 wiki 的实现。
def search(self, position, alpha, beta, depth, root=True):
self.nodes += 1
best_move = []
if position in self.history and not root: return 0 # prevent a three-fold repetition moves when the engine is winning.
bestscore = -999999
if depth == 0 or position.is_final: return self.quiesce(position, alpha, beta)
for move in position.moves():
if position.turn in (0, 2): score = -self.search(position.move(move), -beta, -alpha, depth - 1, False)
else: score = self.search(position.move(move), alpha, beta, depth - 1, False)
if score >= beta: return score # fail-soft beta-cutoff
if score > bestscore:
bestscore = score
if root: best_move = move
if score > alpha: alpha = score
if not root: return bestscore
else: return best_move
def quiesce(self, position, alpha, beta):
if position.is_final: return position.score + position.value() # this position is final so just return the evaluation.
stand_pat = position.score + position.value()
if stand_pat >= beta: return beta
if alpha < stand_pat: alpha = stand_pat
for move in position.moves():
if move[2] != 1: continue # continue through the moves until we encounter another capture.
if position.turn in (0, 2): score = -self.quiesce(position.move(move), -beta, -alpha)
else: score = self.quiesce(position.move(move), alpha, beta)
if score >= beta: return beta
if score > alpha: alpha = score
return alpha
这似乎无法正常工作,我认为如果下一个要移动的玩家在我的团队中,我们不应该切换 alpha 和 beta 并且不要反转值。关于我做错的任何事情的任何建议。