Tic-Tac-Toe - Iterative implementation of alpha beta tree search

懵懂的女人 提交于 2019-12-24 16:12:24

问题


Having issues trying to decipher the principal variation (PV) results.

"The principal variation is a path from the root to a leaf node, in which every node has the same value. This leaf node, whose value determines the minimax value of the root, is called the principal leaf."

The game demo below PV (move,eval) shows this line:

4,0    7,0    6,0    5,0    2,1

How can this be a valid PV since not ALL eval nodes have the same value? The AI never loses, but since the dizzying PV seems bogus, it casts a dark shadow on the AI logic. :( Hopefully, it's just a PV bug!

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           |   |                 3 | 4 | 5
        ---|---|---             ---|---|---
           |   |                 6 | 7 | 8

Your move: 8

Thinking Cycles....: 2784300
Boards Generated...: 3956
Principal Variation: 4,0  7,0  6,0  5,0  2,1
Alpha-Beta Cutoffs.: 931
Computer Evaluation: 0
Computer Move......: 4

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           | X |                 3 | 4 | 5
        ---|---|---             ---|---|---
           |   | O               6 | 7 | 8

Your move: 7

Thinking Cycles....: 410484
Boards Generated...: 575
Principal Variation: 6,0  5,0  2,1
Alpha-Beta Cutoffs.: 63
Computer Evaluation: 0
Computer Move......: 6

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           | X |                 3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 2

Thinking Cycles....: 42808
Boards Generated...: 45
Principal Variation: 5,0  3,0  1,0  0,0
Alpha-Beta Cutoffs.: 1
Computer Evaluation: 0
Computer Move......: 5

           |   | O               0 | 1 | 2
        ---|---|---             ---|---|---
           | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 3

Thinking Cycles....: 6892
Boards Generated...: 4
Principal Variation: 0,0  1,0
Alpha-Beta Cutoffs.: 0
Computer Evaluation: 0
Computer Move......: 0

         X |   | O               0 | 1 | 2
        ---|---|---             ---|---|---
         O | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 1

         X | O | O               0 | 1 | 2
        ---|---|---             ---|---|---
         O | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

A draw! (*_*)

If anyone sees a bug in my code, please let me know. Thanks.

// Tic-Tac-Toe - Iterative implementation of alpha beta tree search.
// Built with Microsoft Visual Studio Professional 2013.

#include "stdafx.h"
#include <windows.h>
#include <intrin.h>
#include <stdint.h>


#define INFINITY 9999
#define NO_MOVE 9
#define NO_EVAL 2
#define X 1
#define O -1
#define Empty 0


struct values
{
    int nodeMove;
    int nodeEval;
    int alpha;
    int beta;
    int player;
    int board[9];
};


struct line
{
    int nodeMove;
    int nodeEval;
};

struct values moves[9];


int bestMove, bestEval;
int nodesCreated;
int abCutoffs;
int pvDepth, pvBestDepth;


// The principal variation pv[9] is a path from the root to a leaf node, in which every node
// has the same value. This leaf node, whose value determines the minimax value of the root,
// is called the principal leaf.

struct line pv[9] = {
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};


struct line bestPV[9] = {
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};


int board_eval(int *b)
{
    // Rows.
    if (b[0] && b[0] == b[1] && b[1] == b[2]) return b[0];
    if (b[3] && b[3] == b[4] && b[4] == b[5]) return b[3];
    if (b[6] && b[6] == b[7] && b[7] == b[8]) return b[6];

    // Cols.
    if (b[0] && b[0] == b[3] && b[3] == b[6]) return b[0];
    if (b[1] && b[1] == b[4] && b[4] == b[7]) return b[1];
    if (b[2] && b[2] == b[5] && b[5] == b[8]) return b[2];

    // Center is empty.
    if (!b[4]) return 0;

    // Diags.
    if (b[0] == b[4] && b[4] == b[8]) return b[0];
    if (b[2] == b[4] && b[4] == b[6]) return b[2];

    return 0;
}


void displayboard(int depth)
{
    const char *t = "O X";

    printf("\n\t %c | %c | %c\t\t 0 | 1 | 2\n", t[moves[depth].board[0] + 1], t[moves[depth].board[1] + 1], t[moves[depth].board[2] + 1]);
    printf("\t---|---|---\t\t---|---|---\n");
    printf("\t %c | %c | %c\t\t 3 | 4 | 5\n", t[moves[depth].board[3] + 1], t[moves[depth].board[4] + 1], t[moves[depth].board[5] + 1]);
    printf("\t---|---|---\t\t---|---|---\n");
    printf("\t %c | %c | %c\t\t 6 | 7 | 8\n\n", t[moves[depth].board[6] + 1], t[moves[depth].board[7] + 1], t[moves[depth].board[8] + 1]);
}


int find_move(int *board_arr, int nodeMove)
{
    int i;

    // Speedup loop using nodeMove instead of 0.
    for (i = nodeMove; i < 9; i++) {
        if (board_arr[i] == Empty)
            return i;
    }

    return NO_MOVE;
}


int move_up_tree(int depth)
{
    depth--;

    if (depth == 0 && (moves[depth + 1].nodeEval > moves[depth].nodeEval))
    {
        bestMove = moves[depth].nodeMove;
        bestEval = moves[depth + 1].nodeEval;
        pvBestDepth = pvDepth;

        pv[depth] = { bestMove, bestEval };

        for (int i = 0; i < pvDepth; ++i)
        {
            bestPV[i].nodeMove = pv[i].nodeMove;
            bestPV[i].nodeEval = pv[i].nodeEval;
            pv[i] = { NO_MOVE, NO_EVAL };
        }
    }

    if (moves[depth].player == X)
    {
        moves[depth].nodeEval = max(moves[depth].nodeEval, moves[depth + 1].nodeEval);
        moves[depth].alpha = max(moves[depth].alpha, moves[depth].nodeEval);
    }
    else
    {
        moves[depth].nodeEval = min(moves[depth].nodeEval, moves[depth + 1].nodeEval);
        moves[depth].beta = min(moves[depth].beta, moves[depth].nodeEval);
    }

    pv[depth] = { moves[depth].nodeMove, moves[depth].nodeEval };

    moves[depth].nodeMove++;
    moves[depth].nodeMove = find_move(moves[depth].board, moves[depth].nodeMove);

    return depth;
}


int move_down_tree(int depth)
{
    int eval;

    depth++;

    moves[depth] = moves[depth - 1];

    nodesCreated++;

    if (moves[depth].player == X)
    {
        moves[depth].board[moves[depth].nodeMove] = X;
        moves[depth].player = O;
        moves[depth].nodeEval = INFINITY;
    }
    else
    {
        moves[depth].board[moves[depth].nodeMove] = O;
        moves[depth].player = X;
        moves[depth].nodeEval = -INFINITY;
    }

    eval = board_eval(moves[depth].board);

    //  Leaf node.
    if (eval || find_move(moves[depth].board, 0) == NO_MOVE)
    {
        moves[depth].nodeEval = eval;
        moves[depth].nodeMove = NO_MOVE;
        pvDepth = depth;
    }
    else
    {
        moves[depth].nodeMove = find_move(moves[depth].board, 0);
    }

    return depth;
}


void computer_move()
{
    int depth = 0;
    uint64_t c1, c2;

    nodesCreated = 0;
    abCutoffs = 0;
    bestMove = NO_MOVE;
    bestEval = -INFINITY;

    moves[0].nodeMove = find_move(moves[0].board, 0);
    moves[0].nodeEval = -INFINITY;
    moves[0].alpha = -INFINITY;
    moves[0].beta = INFINITY;
    moves[0].player = X;

    if (moves[0].nodeMove != NO_MOVE)
    {
        c1 = __rdtsc();

        while (TRUE)
        {
            if (moves[depth].nodeMove == NO_MOVE)
            {
                if (depth == 0) break;

                depth = move_up_tree(depth);
            }
            else if (moves[depth].alpha >= moves[depth].beta)
            {
                abCutoffs++;
                moves[depth].nodeMove = NO_MOVE;
            }
            else
            {
                depth = move_down_tree(depth);
            }
        }

        c2 = __rdtsc();

        moves[0].board[bestMove] = X;

        printf("\n");
        printf("Thinking Cycles....: %d\n", c2 - c1);
        printf("Boards Generated...: %d\n", nodesCreated);
        printf("Principal Variation: ");

        for (int i = 0; i < pvBestDepth; ++i) printf("%d,%d  ", bestPV[i].nodeMove,bestPV[i].nodeEval);

        printf("\n");
        printf("Alpha-Beta Cutoffs.: %d\n", abCutoffs);
        printf("Computer Evaluation: %d\n", bestEval);
        printf("Computer Move......: %d\n", bestMove);
    }
}


void init_board()
{
    moves[0].board[0] = Empty;
    moves[0].board[1] = Empty;
    moves[0].board[2] = Empty;
    moves[0].board[3] = Empty;
    moves[0].board[4] = Empty;
    moves[0].board[5] = Empty;
    moves[0].board[6] = Empty;
    moves[0].board[7] = Empty;
    moves[0].board[8] = Empty;
}


void human_move()
{
    int move;
    char *p, s[100];

    printf("Your move: ");
    while (fgets(s, sizeof(s), stdin)) {
        move = strtol(s, &p, 10);
        if (p == s || *p != '\n') {
            printf("Your move: ");
        }
        else break;
    }

    moves[0].board[move] = O;
}


int main(int argc, char **argv)
{
    init_board();
    displayboard(0);

    while (1)
    {
        human_move();
        computer_move();
        displayboard(0);

        if (board_eval(moves[0].board))
        {
            printf("Computer Wins! (-_-)\n");
            init_board();
            displayboard(0);
        }
        else if (find_move(moves[0].board, 0) == NO_MOVE)
        {
            printf("A draw! (*_*)\n");
            init_board();
            displayboard(0);
        }
    }

    return 0;
}

来源:https://stackoverflow.com/questions/32973257/tic-tac-toe-iterative-implementation-of-alpha-beta-tree-search

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!