三目並べでAlphaBeta法を実装したが，MiniMax法と同じ動作ををしてくれない

前提

三目並べAIをAlphaBeta法で実装しています．
三目並べは
https://github.com/koki0702/tictactoe-ai-youtube
こちらのサイトを参考にしており，Minimax法で実装されていますが，それをAlphaBeta法に変更したいと考えています．

発生している問題・エラーメッセージ

AlphaBeta法はMinimax法の上位互換であり，計算量を少なくしたアルゴリズムであることから，同じ手を打ってくることを期待しているのですが，結果が異なってしまっています．

該当のソースコード

Minimax(これが正しい結果)

Python
1def minimax(board, player):
2    maximize_player = 0
3    minimize_player = 1
4
5    if board.is_win(maximize_player):
6        return (1, None)
7    elif board.is_win(minimize_player):
8        return (-1, None)
9    elif board.is_end():
10        return (0, None)
11
12    opp = 1 if player == 0 else 0
13
14    if player == maximize_player:
15        max_score = float('-inf')
16        max_idx = None
17
18        for idx in board.valid_puts():
19            board.put(player, idx)
20            score, next_idx = minimax(board, opp)
21            if max_score < score:
22                max_score = score
23                max_idx = idx
24            board.take(idx)
25        
26        return (max_score, max_idx)
27    else:
28        min_score = float('inf')
29        min_idx = None
30
31        for idx in board.valid_puts():
32            board.put(player, idx)
33            score, next_idx = minimax(board, opp)
34            if min_score > score:
35                min_score = score
36                min_idx = idx
37            board.take(idx)
38        
39        return (min_score, min_idx)

AlphaBeta(該当のソースコード)

Python
1def alphabeta(board, player, depth, alpha, beta):
2    # print("depth = ",depth)
3    maximize_player = 0
4    minimize_player = 1
5
6    # print(depth)
7
8    if board.is_win(maximize_player):
9        return (1, None)
10    elif board.is_win(minimize_player):
11        return (-1, None)
12    elif board.is_end() or depth == 0:
13        return (0, None)
14    
15
16    opp = 1 if player == 0 else 0
17
18    if player == maximize_player:
19        for put in board.valid_puts():
20            score, next_idx = alphabeta(board.board_result(put), opp, depth-1, alpha, beta)
21            alpha = max(alpha, score)
22            if alpha >= beta:
23                break
24            next_idx = put
25        return alpha, next_idx
26
27    else:
28        for put in board.valid_puts():
29            score, next_idx = alphabeta(board.board_result(put), opp, depth-1, alpha, beta)
30            beta = min(beta, score)
31            if alpha <= beta:
32                break
33            next_idx = put
34        return beta, next_idx

Board クラス

Python
1class Board:
2    def __init__(self) -> None:
3        self.state = [-1] * 9   #置かれている種類(O or X)
4        self.count = 0
5
6    def render(self):
7        MARKS = {0: 'X', 1: 'O'}
8        text = """
9                0|1|2
10                -----
11                3|4|5
12                -----
13                6|7|8
14                """
15        for idx, x in enumerate(self.state):
16            if x is not -1:
17                text = text.replace(str(idx), MARKS[x])  # 4 -> X
18        print(text)
19
20    def put(self, player, idx):
21        if self.state[idx] != -1 or (not(0 <= idx <= 8)):
22            return False
23
24        self.state[idx] = player
25        self.count += 1
26        return True
27
28    def take(self, idx):
29        self.count -= 1
30        self.state[idx] = -1
31
32    def is_win(self, player):
33        s = self.state
34        if(
35            s[0] == s[1] == s[2] == player or
36            s[3] == s[4] == s[5] == player or
37            s[6] == s[7] == s[8] == player or
38            s[0] == s[3] == s[6] == player or
39            s[1] == s[4] == s[7] == player or
40            s[2] == s[5] == s[8] == player or
41            s[0] == s[4] == s[8] == player or
42            s[2] == s[4] == s[6] == player
43        ):
44            return True
45        return False
46
47    def eva_value(self, player):
48        opp = 0 if player == 1 else 1
49        if self.is_win(player):
50            return 1
51        elif self.is_win(opp):
52            return -1
53        else:
54            return 0
55
56    def is_end(self):
57        if -1 in self.state:
58            return False
59        
60        return True
61
62    def valid_puts(self):
63        puts = []   #置ける候補
64        for idx, player in enumerate(self.state):
65            if player == -1:
66                puts.append(idx)
67        return puts
68
69    def board_result(self, idx):
70        tmp = copy.deepcopy(self)
71        n_player = tmp.next_player()
72        tmp.put(n_player, idx)
73        return tmp
74
75    def next_player(self):
76        # 0...先行f, 1...後攻s
77        state = self.state
78        f = s = 0
79        if self.count == 0:
80            return 0
81        
82        for p in state:
83            if p == 0:
84                f += 1
85            elif p == 1:
86                s += 1
87
88        if f == s:
89            return 0
90        elif f > s:
91            return 1
92        else:
93            return -1

NPCクラス

Python
1
2def minimax(board, player):
3    maximize_player = 0
4    minimize_player = 1
5
6    if board.is_win(maximize_player):
7        return (1, None)
8    elif board.is_win(minimize_player):
9        return (-1, None)
10    elif board.is_end():
11        return (0, None)
12
13    opp = 1 if player == 0 else 0
14
15    if player == maximize_player:
16        max_score = float('-inf')
17        max_idx = None
18
19        for idx in board.valid_puts():
20            board.put(player, idx)
21            score, next_idx = minimax(board, opp)
22            if max_score < score:
23                max_score = score
24                max_idx = idx
25            board.take(idx)
26        
27        return (max_score, max_idx)
28    else:
29        min_score = float('inf')
30        min_idx = None
31
32        for idx in board.valid_puts():
33            board.put(player, idx)
34            score, next_idx = minimax(board, opp)
35            if min_score > score:
36                min_score = score
37                min_idx = idx
38            board.take(idx)
39        
40        return (min_score, min_idx)
41
42def alphabeta(board, player, depth, alpha, beta):
43    # print("depth = ",depth)
44    maximize_player = 0
45    minimize_player = 1
46
47    # print(depth)
48
49    if board.is_win(maximize_player):
50        return (1, None)
51    elif board.is_win(minimize_player):
52        return (-1, None)
53    elif board.is_end() or depth == 0:
54        return (0, None)
55    
56
57    opp = 1 if player == 0 else 0
58
59    if player == maximize_player:
60        for put in board.valid_puts():
61            score, next_idx = alphabeta(board.board_result(put), opp, depth-1, alpha, beta)
62            alpha = max(alpha, score)
63            if alpha >= beta:
64                break
65            next_idx = put
66        return alpha, next_idx
67
68    else:
69        for put in board.valid_puts():
70            score, next_idx = alphabeta(board.board_result(put), opp, depth-1, alpha, beta)
71            beta = min(beta, score)
72            if alpha <= beta:
73                break
74            next_idx = put
75        return beta, next_idx
76
77class MiniMax:
78    def __init__(self, player):
79        self.player = player
80    
81    def play(self, board):
82        score, idx = minimax(board, self.player, )
83        return board.put(self.player,idx), idx
84
85class AplhaBeta:
86    def __init__(self , player):
87        self.player = player
88        self.depth = float('inf')
89        # self.depth = 5
90
91    def play(self, board):
92        score ,idx = alphabeta(board, self.player, self.depth, float('-inf'), float('inf'))
93        # idx = alphabeta(board, self.player, self.depth, -500, 500)
94        return board.put(self.player, idx), idx

Playerクラス

Python
1class HumanPlayer:
2    def __init__(self, player):
3        self.player = player
4        
5    def play(self, board):
6        while True:
7            print('0~8の数字を入力してください：', end="")
8            idx = input()
9
10            try:
11                idx = int(idx)
12                success = board.put(self.player, idx)
13                if success:
14                    break
15                else:
16                    print("適切な数字を入力してください")
17            except ValueError:
18                pass

Main

Python
1import Board
2import NPC
3import PLAYER
4
5board = Board()
6players = [NPC.AplhaBeta(0), PLAYER.HumanPlayer(1)]
7player = 0 # 0 or 1
8
9while True:
10    p = players[player]
11    p.play(board)
12    board.render()
13
14    if board.is_win(player):
15        break
16    elif board.is_end():
17        print("引き分け")
18        break
19
20    player = 1 if player == 0 else 0