python - 蒙特卡洛树搜索不断给出相同的结果

Question

我写了一个蒙特卡洛树搜索算法（基于https://en.wikipedia.org/wiki/Monte_Carlo_tree_search），并将它与“python-chess”库连接起来。

基本上，该算法卡在某个地方，因为它一直打印为输出“1/2-1/2”（绘图）。

扩展功能可能有问题，但我真的不知道在哪里。

这是代码：

class MonteCarloTreeSearch():
    def __init__(self):
        self.board = chess.Board()
        self.results = []
        self.total_matches = 0

def expansion(self, leaf):
    done = False
    if not (self.board.is_game_over()):
        if len(leaf.children != 0):
            possible = np.array([i for i in self.board.legal_moves])
            actual = []
            for i in range(len(possible)):
                test = possible[i]
                cnt = 0
                for j in range(len(leaf.children)):
                    if (not test == leaf.children[j].value) and cnt == 0:
                        actual.append(test)
                        cnt += 1
            move = random.choice(actual)
        else:
            move = random.choice([i for i in self.board.legal_moves])                
        
        self.board.push(move)            
        if self.board.is_game_over():
            done = True
        
        child = Node(move, player="white" if leaf.player=="black" else "black", parent=[leaf], score=0)
        
        leaf.children = np.append(leaf.children, child)
        
        if not done:
            return self.expansion(child)
    return leaf
            
def playout(self, starting_node):        
    childr = self.expansion(starting_node)  
    
    result = self.board.result()
    if result == "1-0":
        result = 1
    elif result == "0-1":
        result = -1
    elif result == "1/2-1/2":
        result = .5
    elif result == "*":
        raise Exception("ERROR: Game was not over, but playout stopped.")
    else:
        raise Exception("ERROR: Playout process error.")
                    
    return childr, result, starting_node

def expansion_playout_backpropagation(self, start):
    node, result, starting_node = self.playout(start)
            
    i = 0
    while i == 0: 
        node.matches += 1
        if node.player == "white":
            if result == 1:
                node.score += 1
                
            if node.matches > 0:
                node.winp = node.score / node.matches
            else:
                node.winp = 0
        elif node.player == "black":
            if result == -1:
                node.score += 1
                
            if node.matches > 0:
                node.winp = node.score / node.matches
            else:
                node.winp = 0
        else:
            raise Exception("ERROR: Invalid player selection.")
            
        if node.is_equal(starting_node):
            i += 1
        else:
            node = node.parent[0]
        
    self.results.append(self.board.result())
    print(self.board.result())
        
    self.board.reset_board()
    self.total_matches += 1

def backpropagation(self, node, result, starting_node):
        i = 0
        while i == 0:
            node.matches += 1
            if node.player == "white":
                if result == 1:
                    node.score += 1
                    
                if node.matches > 0:
                    node.winp = node.score / node.matches
                else:
                    node.winp = 0
            elif node.player == "black":
                if result == -1:
                    node.score += 1
                    
                if node.matches > 0:
                    node.winp = node.score / node.matches
                else:
                    node.winp = 0
            else:
                raise Exception("ERROR: Invalid player selection.")

            if node.is_equal(starting_node):
                i += 1
            else:
                node = node.parent[0]

        self.results.append(self.board.result())
        print(self.board.result())

        self.board.reset_board()
        self.total_matches += 1
        
            
def fitness(self, node):
    p = node.winp
    simulations = node.matches
    parent_simulations = node.parent[0].matches if node.parent[0] != None else self.matches
    c = math.sqrt(2)
    if simulations > 0 and parent_simulations > 0:
        formula = p + c * math.sqrt((np.log(parent_simulations)) / simulations)
    else:
        formula = p
    
    return formula

python - 蒙特卡洛树搜索不断给出相同的结果

0 回答 0

Related

Reference