Tic-Tac-Toe博弈中MiniMax算法的最优移动

提问者：小点点

Tic-Tac-Toe博弈中MiniMax算法的最优移动

我试图在用C编写的Tic-Tac-Toe游戏中实现基于维基百科伪代码的MiniMax算法。然而，我无法获得最佳的移动。这是我的密码：

#include <stdio.h>
#include <stdbool.h>
#include <string.h>

// compile and run: gcc minimax.c -std=c99 && ./a.out

int max(int x, int y) {
  return x > y ? x : y;
}

int min(int x, int y) {
  return x < y ? x : y;
}

int whoWon(char ch) {
  switch (ch) {
    case 'O':
      return -1;
      break;
    case 'X':
      return 1;
      break;
  }
}

void printArray(char array[]) {
  printf("# START\n"
         "%c | %c | %c\n"
         "--|---|--\n"
         "%c | %c | %c\n"
         "--|---|--\n"
         "%c | %c | %c\n"
         "# END\n\n", array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7], array[8]);
}

int anyWinners(char board[])
{
  int i;

  /* check every row */
  for(i = 0; i < 7; i += 3)
    if(board[i] != ' ' && board[i] == board[i+1] && board[i] == board[i+2])
      return whoWon(board[i]);

  /* check every column */
  for(i = 0; i < 3; i++)
    if(board[i] != ' ' && board[i] == board[i+3] && board[i] == board[i+6])
      return whoWon(board[i]);

  /* check diagonals */
  if(board[4] != ' ' && ((board[0] == board[4] && board[0] == board[8]) || (board[2] == board[4] && board[2] == board[6])))
    return whoWon(board[4]);

  return 0;
}

int fullBoard(char board[]) {
  for (int i = 0; i < 9; ++i) {
    if (board[i] == ' ')
      return 0;
  }
  return 1;
}

int minimax(char node[], int depth, bool maximizingPlayer, int * move) {
  int terminalNode = anyWinners(node);
  if (depth == 0 || terminalNode || fullBoard(node)) {
    printf("################## END OF SUBTREE ##################\n");
    return terminalNode;
  }

  int bestValue, val;
  if (maximizingPlayer) {
    bestValue = -2;
    for (int i = 0; i < 9; ++i) {
      if (node[i] == ' ') {
        char child[9];
        strcpy(child, node);
        child[i] = 'X';

        // debug
        printArray(child);

        val = minimax(child, depth - 1, false, move);

        // debug
        printf("X: ^^ i = %d ^^ depth = %d ^^ val = %d\n", i, depth, val);

        //bestValue = max(bestValue, val);
        if (val > bestValue) {
          bestValue = val;
          if (depth == 9) *move = i;
        }
      }
    }
    return bestValue;
  } else {
    bestValue = 2;
    for (int i = 0; i < 9; ++i) {
      if (node[i] == ' ') {
        char child[9];
        strcpy(child, node);
        child[i] = 'O';

        // debug
        printArray(child);

        val = minimax(child, depth - 1, true, move);

        // debug
        printf("O: ^^ i = %d ^^ depth = %d ^^ val = %d\n", i, depth, val);

        bestValue = min(bestValue, val);
      }
    }
    return bestValue;
  }
}

int main() {
  int move = -999; // initialize only for debug

  // X will always win no matter what, first best move for X is 8
  // char board[] = {'O', ' ', ' ',
  //                 ' ', ' ', ' ',
  //                 'X', 'X', ' '};

  // best move for X is 3
  char board[] = {'O', 'O', ' ',
                  ' ', 'X', 'X',
                  ' ', ' ', ' '};

  // Initial call for maximizing player
  int result = minimax(board, 9, true, &move);
  printf("minimax returned: %d\n", result);
  printf("chosen move: %d\n", move);

  return 0;
}

代码为每个移动打印带有所有变量状态的板。在main中还注释了两个失败的测试。现在算法返回错误的移动，我找不到错误。

共1个答案

匿名用户

我看到两个问题：

启发式错误
strcpy有一个问题

启发式是错误的

维基百科的伪代码说：

if depth = 0 or node is a terminal node
    return the heuristic value of node

您的实现可以执行以下操作：

if depth = 0 or node is a terminal node
    return 1 if X wins, -1 if O wins, 0 if it is a draw

但这不是一个很好的启发。有了这个启发，X可能获胜的所有可能方式都是同等权重的。因此，如果X在3个招式中找到获胜的方法，则权重与X在2个招式中找到获胜的方法相同，并且权重与X在1个招式中找到获胜的方法相同。

因此，以下是在您的测试用例中发生的情况：

X尝试位置2
O尝试位置3
X尝试位置6
这是一个终端节点。X赢了。所以返回正1

此决策路径的启发式方法=1

它击中的另一种可能性是：

X尝试位置3

此决策路径的启发式方法=1

由于这两个解具有相同的启发式，因此它们的值相等。您可能是想让这个解决方案不太理想，因为它需要太多的动作才能获胜。我建议一个启发式的方法，它是基于到达这里所需的移动次数，乘以谁是赢家。因此，如果X在1个动作中获胜，则启发式为5000。如果X在两次移动中获胜，则启发式为2500。如果O在2步中获胜，则启发式为-2500。差不多吧。

strcpy有一个问题

这一行：

strcpy(child, node);

应该是：

memcpy(child, node, 9*sizeof(char));

因为“node”不是以null结尾的字符串。当我在VS2013/Windows 8.1上运行这个时，我的输出是垃圾，因为。在你的平台上你可能会很幸运。