Tic-Tac-Toe - Iterative implementation of alpha beta tree search

问题

Having issues trying to decipher the principal variation (PV) results.

"The principal variation is a path from the root to a leaf node, in which every node has the same value. This leaf node, whose value determines the minimax value of the root, is called the principal leaf."

The game demo below PV (move,eval) shows this line:

4,0    7,0    6,0    5,0    2,1

How can this be a valid PV since not ALL eval nodes have the same value? The AI never loses, but since the dizzying PV seems bogus, it casts a dark shadow on the AI logic. :( Hopefully, it's just a PV bug!

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           |   |                 3 | 4 | 5
        ---|---|---             ---|---|---
           |   |                 6 | 7 | 8

Your move: 8

Thinking Cycles....: 2784300
Boards Generated...: 3956
Principal Variation: 4,0  7,0  6,0  5,0  2,1
Alpha-Beta Cutoffs.: 931
Computer Evaluation: 0
Computer Move......: 4

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           | X |                 3 | 4 | 5
        ---|---|---             ---|---|---
           |   | O               6 | 7 | 8

Your move: 7

Thinking Cycles....: 410484
Boards Generated...: 575
Principal Variation: 6,0  5,0  2,1
Alpha-Beta Cutoffs.: 63
Computer Evaluation: 0
Computer Move......: 6

           |   |                 0 | 1 | 2
        ---|---|---             ---|---|---
           | X |                 3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 2

Thinking Cycles....: 42808
Boards Generated...: 45
Principal Variation: 5,0  3,0  1,0  0,0
Alpha-Beta Cutoffs.: 1
Computer Evaluation: 0
Computer Move......: 5

           |   | O               0 | 1 | 2
        ---|---|---             ---|---|---
           | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 3

Thinking Cycles....: 6892
Boards Generated...: 4
Principal Variation: 0,0  1,0
Alpha-Beta Cutoffs.: 0
Computer Evaluation: 0
Computer Move......: 0

         X |   | O               0 | 1 | 2
        ---|---|---             ---|---|---
         O | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

Your move: 1

         X | O | O               0 | 1 | 2
        ---|---|---             ---|---|---
         O | X | X               3 | 4 | 5
        ---|---|---             ---|---|---
         X | O | O               6 | 7 | 8

A draw! (*_*)

If anyone sees a bug in my code, please let me know. Thanks.

// Tic-Tac-Toe - Iterative implementation of alpha beta tree search.
// Built with Microsoft Visual Studio Professional 2013.

#include "stdafx.h"
#include <windows.h>
#include <intrin.h>
#include <stdint.h>


#define INFINITY 9999
#define NO_MOVE 9
#define NO_EVAL 2
#define X 1
#define O -1
#define Empty 0


struct values
{
    int nodeMove;
    int nodeEval;
    int alpha;
    int beta;
    int player;
    int board[9];
};


struct line
{
    int nodeMove;
    int nodeEval;
};

struct values moves[9];


int bestMove, bestEval;
int nodesCreated;
int abCutoffs;
int pvDepth, pvBestDepth;


// The principal variation pv[9] is a path from the root to a leaf node, in which every node
// has the same value. This leaf node, whose value determines the minimax value of the root,
// is called the principal leaf.

struct line pv[9] = {
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};


struct line bestPV[9] = {
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL },
        { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }, { NO_MOVE, NO_EVAL }
};


int board_eval(int *b)
{
    // Rows.
    if (b[0] && b[0] == b[1] && b[1] == b[2]) return b[0];
    if (b[3] && b[3] == b[4] && b[4] == b[5]) return b[3];
    if (b[6] && b[6] == b[7] && b[7] == b[8]) return b[6];

    // Cols.
    if (b[0] && b[0] == b[3] && b[3] == b[6]) return b[0];
    if (b[1] && b[1] == b[4] && b[4] == b[7]) return b[1];
    if (b[2] && b[2] == b[5] && b[5] == b[8]) return b[2];

    // Center is empty.
    if (!b[4]) return 0;

    // Diags.
    if (b[0] == b[4] && b[4] == b[8]) return b[0];
    if (b[2] == b[4] && b[4] == b[6]) return b[2];

    return 0;
}


void displayboard(int depth)
{
    const char *t = "O X";

    printf("\n\t %c | %c | %c\t\t 0 | 1 | 2\n", t[moves[depth].board[0] + 1], t[moves[depth].board[1] + 1], t[moves[depth].board[2] + 1]);
    printf("\t---|---|---\t\t---|---|---\n");
    printf("\t %c | %c | %c\t\t 3 | 4 | 5\n", t[moves[depth].board[3] + 1], t[moves[depth].board[4] + 1], t[moves[depth].board[5] + 1]);
    printf("\t---|---|---\t\t---|---|---\n");
    printf("\t %c | %c | %c\t\t 6 | 7 | 8\n\n", t[moves[depth].board[6] + 1], t[moves[depth].board[7] + 1], t[moves[depth].board[8] + 1]);
}


int find_move(int *board_arr, int nodeMove)
{
    int i;

    // Speedup loop using nodeMove instead of 0.
    for (i = nodeMove; i < 9; i++) {
        if (board_arr[i] == Empty)
            return i;
    }

    return NO_MOVE;
}


int move_up_tree(int depth)
{
    depth--;

    if (depth == 0 && (moves[depth + 1].nodeEval > moves[depth].nodeEval))
    {
        bestMove = moves[depth].nodeMove;
        bestEval = moves[depth + 1].nodeEval;
        pvBestDepth = pvDepth;

        pv[depth] = { bestMove, bestEval };

        for (int i = 0; i < pvDepth; ++i)
        {
            bestPV[i].nodeMove = pv[i].nodeMove;
            bestPV[i].nodeEval = pv[i].nodeEval;
            pv[i] = { NO_MOVE, NO_EVAL };
        }
    }

    if (moves[depth].player == X)
    {
        moves[depth].nodeEval = max(moves[depth].nodeEval, moves[depth + 1].nodeEval);
        moves[depth].alpha = max(moves[depth].alpha, moves[depth].nodeEval);
    }
    else
    {
        moves[depth].nodeEval = min(moves[depth].nodeEval, moves[depth + 1].nodeEval);
        moves[depth].beta = min(moves[depth].beta, moves[depth].nodeEval);
    }

    pv[depth] = { moves[depth].nodeMove, moves[depth].nodeEval };

    moves[depth].nodeMove++;
    moves[depth].nodeMove = find_move(moves[depth].board, moves[depth].nodeMove);

    return depth;
}


int move_down_tree(int depth)
{
    int eval;

    depth++;

    moves[depth] = moves[depth - 1];

    nodesCreated++;

    if (moves[depth].player == X)
    {
        moves[depth].board[moves[depth].nodeMove] = X;
        moves[depth].player = O;
        moves[depth].nodeEval = INFINITY;
    }
    else
    {
        moves[depth].board[moves[depth].nodeMove] = O;
        moves[depth].player = X;
        moves[depth].nodeEval = -INFINITY;
    }

    eval = board_eval(moves[depth].board);

    //  Leaf node.
    if (eval || find_move(moves[depth].board, 0) == NO_MOVE)
    {
        moves[depth].nodeEval = eval;
        moves[depth].nodeMove = NO_MOVE;
        pvDepth = depth;
    }
    else
    {
        moves[depth].nodeMove = find_move(moves[depth].board, 0);
    }

    return depth;
}


void computer_move()
{
    int depth = 0;
    uint64_t c1, c2;

    nodesCreated = 0;
    abCutoffs = 0;
    bestMove = NO_MOVE;
    bestEval = -INFINITY;

    moves[0].nodeMove = find_move(moves[0].board, 0);
    moves[0].nodeEval = -INFINITY;
    moves[0].alpha = -INFINITY;
    moves[0].beta = INFINITY;
    moves[0].player = X;

    if (moves[0].nodeMove != NO_MOVE)
    {
        c1 = __rdtsc();

        while (TRUE)
        {
            if (moves[depth].nodeMove == NO_MOVE)
            {
                if (depth == 0) break;

                depth = move_up_tree(depth);
            }
            else if (moves[depth].alpha >= moves[depth].beta)
            {
                abCutoffs++;
                moves[depth].nodeMove = NO_MOVE;
            }
            else
            {
                depth = move_down_tree(depth);
            }
        }

        c2 = __rdtsc();

        moves[0].board[bestMove] = X;

        printf("\n");
        printf("Thinking Cycles....: %d\n", c2 - c1);
        printf("Boards Generated...: %d\n", nodesCreated);
        printf("Principal Variation: ");

        for (int i = 0; i < pvBestDepth; ++i) printf("%d,%d  ", bestPV[i].nodeMove,bestPV[i].nodeEval);

        printf("\n");
        printf("Alpha-Beta Cutoffs.: %d\n", abCutoffs);
        printf("Computer Evaluation: %d\n", bestEval);
        printf("Computer Move......: %d\n", bestMove);
    }
}


void init_board()
{
    moves[0].board[0] = Empty;
    moves[0].board[1] = Empty;
    moves[0].board[2] = Empty;
    moves[0].board[3] = Empty;
    moves[0].board[4] = Empty;
    moves[0].board[5] = Empty;
    moves[0].board[6] = Empty;
    moves[0].board[7] = Empty;
    moves[0].board[8] = Empty;
}


void human_move()
{
    int move;
    char *p, s[100];

    printf("Your move: ");
    while (fgets(s, sizeof(s), stdin)) {
        move = strtol(s, &p, 10);
        if (p == s || *p != '\n') {
            printf("Your move: ");
        }
        else break;
    }

    moves[0].board[move] = O;
}


int main(int argc, char **argv)
{
    init_board();
    displayboard(0);

    while (1)
    {
        human_move();
        computer_move();
        displayboard(0);

        if (board_eval(moves[0].board))
        {
            printf("Computer Wins! (-_-)\n");
            init_board();
            displayboard(0);
        }
        else if (find_move(moves[0].board, 0) == NO_MOVE)
        {
            printf("A draw! (*_*)\n");
            init_board();
            displayboard(0);
        }
    }

    return 0;
}

来源：https://stackoverflow.com/questions/32973257/tic-tac-toe-iterative-implementation-of-alpha-beta-tree-search

标签

alpha-beta-pruning