我已经尝试了几天来实现这个基本实现蒙特卡洛树搜索井字游戏,但我就是不知道为什么它总是无法选择最佳动作。
例如,在某些情况下,它拒绝选择会导致它在那个回合中获胜的动作 - 所以很明显我在某个地方有错误,但我找不到它。
任何帮助将不胜感激,因为我即将放弃......
其他课程可以在 gitlab 链接上查看,因此如果您想全部下载并进行实验,您可以:https ://gitlab.com/Cryosis7/monte_carlo_learning
public class MCTS {
public static int search(Game game) {
final int player = game.getPlayer();
Node node = new Node(game);
int counter = 0;
while (counter++ < 1000) {
while (!node.isLeaf()) {
node = node.moveDown(UCT.findBestNodeWithUCT(node));
}
if (node.getVisits() != 0) {
node.expandNode();
if (node.hasChildren())
node = node.moveDown(0);
}
// Roll Out
final int score = rolloutNode(node, player);
// Back Propagate
node.addScore(score);
node.incrementVisits();
while (node.hasParent()) {
node = node.moveUp();
node.addScore(score);
node.incrementVisits();
}
}
return Collections.max(node.getChildren(), Comparator.comparing(Node::getVisits)).getMove();
}
private static int rolloutNode(Node node, int rootPlayer) {
int startingDepth = node.getDepth();
while (!node.isTerminal()) {
final Node child = node.addRandomChild();
node = node.moveDown(child);
}
int score = node.getGame().getScore(rootPlayer);
while (node.getDepth() != startingDepth) {
assert node.getParent().getChildren().size() == 1;
node.getParent().getChildren().clear();
node = node.moveUp();
}
return score;
}
}
public class UCT {
public static double uctValue(int totalVisit, double nodeWinScore, int nodeVisit) {
if (nodeVisit == 0)
return Integer.MAX_VALUE;
return (nodeWinScore / (double) nodeVisit) + 2 * Math.sqrt(Math.log(totalVisit) / (double) nodeVisit);
}
public static Node findBestNodeWithUCT(Node node) {
int parentVisit = node.getVisits();
return Collections.max(node.getChildren(),
Comparator.comparing(c -> uctValue(parentVisit, c.getScore(), c.getVisits())));
}
}