0

我已经尝试了几天来实现这个基本实现蒙特卡洛树搜索井字游戏,但我就是不知道为什么它总是无法选择最佳动作。
例如,在某些情况下,它拒绝选择会导致它在那个回合中获胜的动作 - 所以很明显我在某个地方有错误,但我找不到它。

任何帮助将不胜感激,因为我即将放弃......

其他课程可以在 gitlab 链接上查看,因此如果您想全部下载并进行实验,您可以:https ://gitlab.com/Cryosis7/monte_carlo_learning

public class MCTS {

    public static int search(Game game) {
        final int player = game.getPlayer();

        Node node = new Node(game);

        int counter = 0;
        while (counter++ < 1000) {

            while (!node.isLeaf()) {
                    node = node.moveDown(UCT.findBestNodeWithUCT(node));
            }

            if (node.getVisits() != 0) {
                node.expandNode();
                if (node.hasChildren())
                    node = node.moveDown(0);
            }

            // Roll Out
            final int score = rolloutNode(node, player);

            // Back Propagate
            node.addScore(score);
            node.incrementVisits();
            while (node.hasParent()) {
                node = node.moveUp();
                node.addScore(score);
                node.incrementVisits();
            }
        }

        return Collections.max(node.getChildren(), Comparator.comparing(Node::getVisits)).getMove();
    }

    private static int rolloutNode(Node node, int rootPlayer) {
        int startingDepth = node.getDepth();

        while (!node.isTerminal()) {
            final Node child = node.addRandomChild();
            node = node.moveDown(child);
        }

        int score = node.getGame().getScore(rootPlayer);
        while (node.getDepth() != startingDepth) {
            assert node.getParent().getChildren().size() == 1;
            node.getParent().getChildren().clear();
            node = node.moveUp();
        }

        return score;
    }
}
public class UCT {
    public static double uctValue(int totalVisit, double nodeWinScore, int nodeVisit) {
        if (nodeVisit == 0)
            return Integer.MAX_VALUE;
        return (nodeWinScore / (double) nodeVisit) + 2 * Math.sqrt(Math.log(totalVisit) / (double) nodeVisit);
    }

    public static Node findBestNodeWithUCT(Node node) {
        int parentVisit = node.getVisits();
        return Collections.max(node.getChildren(),
                Comparator.comparing(c -> uctValue(parentVisit, c.getScore(), c.getVisits())));
    }
}
4

0 回答 0