我正在为游戏筷子制作一个 C++ 程序。
这是一个非常简单的游戏,总共只有 625 个游戏状态(如果考虑到对称性和无法到达的状态,它会更低)。我已经阅读了 minimax 和 alpha-beta 算法,主要用于井字游戏,但我遇到的问题是,在井字游戏中,不可能循环回到以前的状态,而这很容易在筷子中发生。因此,在运行代码时,最终会出现堆栈溢出。
我通过为以前访问过的状态添加标志来解决此问题(我不知道这是否是正确的方法。)以便可以避免它们,但现在我遇到的问题是输出不像预期的那样对称。
例如,在游戏的开始状态下,每个玩家都有一根手指,所以它们都是对称的。程序告诉我,最好的动作是用左手击打我的右手,而不是相反。
我的源代码是 -
#include <iostream>
#include <array>
#include <vector>
#include <limits>
std::array<int, 625> t; //Flags for visited states.
std::array<int, 625> f; //Flags for visited states.
int no = 0; //Unused. For debugging.
class gamestate
{
public:
gamestate(int x, bool t) : turn(t) //Constructor.
{
for (int i = 0; i < 2; i++)
for (int j = 0; j < 2; j++) {
val[i][j] = x % 5;
x /= 5;
}
init();
}
void print() //Unused. For debugging.
{
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++)
std::cout << val[i][j] << "\t";
std::cout << "\n";
}
std::cout << "\n";
}
std::array<int, 6> canmove = {{ 1, 1, 1, 1, 1, 1 }}; //List of available moves.
bool isover() //Is the game over.
{
return ended;
}
bool won() //Who won the game.
{
return winner;
}
bool isturn() //Whose turn it is.
{
return turn;
}
std::vector<int> choosemoves() //Choose the best possible moves in the current state.
{
std::vector<int> bestmoves;
if(ended)
return bestmoves;
std::array<int, 6> scores;
int bestscore;
if(turn)
bestscore = std::numeric_limits<int>::min();
else
bestscore = std::numeric_limits<int>::max();
scores.fill(bestscore);
for (int i = 0; i < 6; i++)
if (canmove[i]) {
t.fill(0);
f.fill(0);
gamestate *play = new gamestate(this->playmove(i),!turn);
scores[i] = minimax(play, 0, std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
std::cout<<i<<": "<<scores[i]<<std::endl;
delete play;
if (turn) if (scores[i] > bestscore) bestscore = scores[i];
if (!turn) if (scores[i] < bestscore) bestscore = scores[i];
}
for (int i = 0; i < 6; i++)
if (scores[i] == bestscore)
bestmoves.push_back(i);
return bestmoves;
}
private:
std::array<std::array<int, 2>, 2 > val; //The values of the fingers.
bool turn; //Whose turn it is.
bool ended = false; //Has the game ended.
bool winner; //Who won the game.
void init() //Check if the game has ended and find the available moves.
{
if (!(val[turn][0]) && !(val[turn][1])) {
ended = true;
winner = !turn;
canmove.fill(0);
return;
}
if (!(val[!turn][0]) && !(val[!turn][1])) {
ended = true;
winner = turn;
canmove.fill(0);
return;
}
if (!val[turn][0]) {
canmove[0] = 0;
canmove[1] = 0;
canmove[2] = 0;
if (val[turn][1] % 2)
canmove[5] = 0;
}
if (!val[turn][1]) {
if (val[turn][0] % 2)
canmove[2] = 0;
canmove[3] = 0;
canmove[4] = 0;
canmove[5] = 0;
}
if (!val[!turn][0]) {
canmove[0] = 0;
canmove[3] = 0;
}
if (!val[!turn][1]) {
canmove[1] = 0;
canmove[4] = 0;
}
}
int playmove(int mov) //Play a move to get the next game state.
{
auto newval = val;
switch (mov) {
case 0:
newval[!turn][0] = (newval[turn][0] + newval[!turn][0]);
newval[!turn][0] = (5 > newval[!turn][0]) ? newval[!turn][0] : 0;
break;
case 1:
newval[!turn][1] = (newval[turn][0] + newval[!turn][1]);
newval[!turn][1] = (5 > newval[!turn][1]) ? newval[!turn][1] : 0;
break;
case 2:
if (newval[turn][1]) {
newval[turn][1] = (newval[turn][0] + newval[turn][1]);
newval[turn][1] = (5 > newval[turn][1]) ? newval[turn][1] : 0;
} else {
newval[turn][0] /= 2;
newval[turn][1] = newval[turn][0];
}
break;
case 3:
newval[!turn][0] = (newval[turn][1] + newval[!turn][0]);
newval[!turn][0] = (5 > newval[!turn][0]) ? newval[!turn][0] : 0;
break;
case 4:
newval[!turn][1] = (newval[turn][1] + newval[!turn][1]);
newval[!turn][1] = (5 > newval[!turn][1]) ? newval[!turn][1] : 0;
break;
case 5:
if (newval[turn][0]) {
newval[turn][0] = (newval[turn][1] + newval[turn][0]);
newval[turn][0] = (5 > newval[turn][0]) ? newval[turn][0] : 0;
} else {
newval[turn][1] /= 2;
newval[turn][0] = newval[turn][1];
}
break;
default:
std::cout << "\nInvalid move!\n";
}
int ret = 0;
for (int i = 1; i > -1; i--)
for (int j = 1; j > -1; j--) {
ret+=newval[i][j];
ret*=5;
}
ret/=5;
return ret;
}
static int minimax(gamestate *game, int depth, int alpha, int beta) //Minimax searching function with alpha beta pruning.
{
if (game->isover()) {
if (game->won())
return 1000 - depth;
else
return depth - 1000;
}
if (game->isturn()) {
for (int i = 0; i < 6; i++)
if (game->canmove[i]&&t[game->playmove(i)]!=-1) {
int score;
if(!t[game->playmove(i)]){
t[game->playmove(i)] = -1;
gamestate *play = new gamestate(game->playmove(i),!game->isturn());
score = minimax(play, depth + 1, alpha, beta);
delete play;
t[game->playmove(i)] = score;
}
else
score = t[game->playmove(i)];
if (score > alpha) alpha = score;
if (alpha >= beta) break;
}
return alpha;
} else {
for (int i = 0; i < 6; i++)
if (game->canmove[i]&&f[game->playmove(i)]!=-1) {
int score;
if(!f[game->playmove(i)]){
f[game->playmove(i)] = -1;
gamestate *play = new gamestate(game->playmove(i),!game->isturn());
score = minimax(play, depth + 1, alpha, beta);
delete play;
f[game->playmove(i)] = score;
}
else
score = f[game->playmove(i)];
if (score < beta) beta = score;
if (alpha >= beta) break;
}
return beta;
}
}
};
int main(void)
{
gamestate test(243, true);
auto movelist = test.choosemoves();
for(auto i: movelist)
std::cout<<i<<std::endl;
return 0;
}
我将移动以一种以 5 为底的十进制系统传递,因为每只手的值都可以从 0 到 4。
在我输入状态的代码中 -
3 3
4 1
输出说我应该将右手 (1) 击到对手的右侧 (3),但没有说我应该将其击到对手的左侧 (也是 3)
我认为问题在于我处理无限循环的方式。
什么是正确的方法?或者,如果这是正确的方法,那么我该如何解决这个问题?
另外请让我知道如何改进我的代码。
非常感谢。
编辑:
我已将我的 minimax 函数更改如下,以确保无限循环的得分高于失败,但我仍然没有得到对称性。我还做了一个函数来增加乐谱的深度
static float minimax(gamestate *game, int depth, float alpha, float beta) //Minimax searching function with alpha beta pruning.
{
if (game->isover()) {
if (game->won())
return 1000 - std::atan(depth) * 2000 / std::acos(-1);
else
return std::atan(depth) * 2000 / std::acos(-1) - 1000;
}
if (game->isturn()) {
for (int i = 0; i < 6; i++)
if (game->canmove[i]) {
float score;
if(!t[game->playmove(i)]) {
t[game->playmove(i)] = -1001;
gamestate *play = new gamestate(game->playmove(i), !game->isturn());
score = minimax(play, depth + 1, alpha, beta);
delete play;
t[game->playmove(i)] = score;
} else if(t[game->playmove(i)] == -1001)
score = 0;
else
score = adddepth(t[game->playmove(i)], depth);
if (score > alpha) alpha = score;
if (alpha >= beta) break;
}
return alpha;
} else {
for (int i = 0; i < 6; i++)
if (game->canmove[i]) {
float score;
if(!f[game->playmove(i)]) {
f[game->playmove(i)] = -1001;
gamestate *play = new gamestate(game->playmove(i), !game->isturn());
score = minimax(play, depth + 1, alpha, beta);
delete play;
f[game->playmove(i)] = score;
} else if(f[game->playmove(i)] == -1001)
score = 0;
else
score = adddepth(f[game->playmove(i)], depth);
if (score < beta) beta = score;
if (alpha >= beta) break;
}
return beta;
}
}
这是增加深度的功能 -
float adddepth(float score, int depth) //Add depth to pre-calculated score.
{
int olddepth;
float newscore;
if(score > 0) {
olddepth = std::tan((1000 - score) * std::acos(-1) / 2000);
depth += olddepth;
newscore = 1000 - std::atan(depth) * 2000 / std::acos(-1);
} else {
olddepth = std::tan((1000 + score) * std::acos(-1) / 2000);
depth += olddepth;
newscore = std::atan(depth) * 2000 / std::acos(-1) - 1000;
}
return newscore;
}