invalid syntax の解決方法について

前提・実現したいこと

強化学習に関するプログラムを作成中です。
https://github.com/aimacode/aima-python
このページにあるmdp.pyをコピペ、
または
https://qiita.com/Hironsan/items/56f6c0b2f4cfd28dd906
をコピペさせてもらっています。

発生している問題・エラーメッセージ

numpy上にエラーがあるように思いましたが、異常はありませんでした。
解決方法をご存知の方がいらっしゃれば教えてください！

Traceback (most recent call last):
  File "qlearning.py", line 6, in <module>
    from utils import vector_add, orientations, turn_right, turn_left
  File "/Users/Owner/Projects/qlearning/utils.py", line 647
    yield from subexpressions(arg)
             ^
SyntaxError: invalid syntax

該当のソースコード

python
1import random
2from collections import defaultdict
3
4import numpy as np
5
6from utils import vector_add, orientations, turn_right, turn_left
7
8
9class MDP :
10
11    def __init__(self, init, actlist, terminals, gamma=.9):
12        self.init = init
13        self.actlist = actlist
14        self.terminals = terminals
15        if not (0 <= gamma< 1):
16            raise ValueError("An MDP must have 0 <= gamma < 1")
17        self.gamma = gamma
18        self.states = set()
19        self.reward = {}
20
21    def R(self, state):
22        return self.reward[state]
23
24    def T(self, state, action) :
25        raise NotImplementedError
26
27    def actions(self, state):
28        if state in self.terminals:
29            return [None]
30        else:
31            return self.actlist
32
33
34class GridMDP(MDP):
35
36    def __init__(self, grid, terminals, init= (0, 0), gamma= .9):
37        grid.reverse()
38        MDP.__init__(self, init, actlist=actlist,
39                     terminals= terminals, gamma= gamma)
40        self.grid = grid
41        self.rows = len(grid)
42        self.cols = len(grid[0])
43        for x in range(self.cols):
44            for y in range(self.rows):
45                self.reward[x, y] = grid[y][x]
46                if grid[y][x] is not None:
47                    self.states.add((x, y))
48
49    def T(self, state, action):
50        if action is None:
51            return [(0.0, state)]
52        else:
53            return [(0.8, self.go(state, action)),
54                    (0.1, self.go(state, turn_right(action))),
55                    (0.1, self.go(state, turn_left(action)))]
56
57    def go(self, state, direction):
58        state1 = vector_add(state, direction)
59        return state1 if state1 in self.states else state
60
61    def to_grid(self, mapping):
62        return list(reversed([[mapping.get((x, y), None)
63                               for x in range(self.cols)]
64                              for y in range(self.rows)]))
65
66    def to_arrows(self, policy):
67        chars = {(1, 0): '>', (0, 1): '^', (-1, 0): '<', (0, -1): 'v', None: '.'}
68        return self.to_grid({s: chars[a] for (s, a) in policy.items()})
69
70GridMDP([[-0.04, -0.04, -0.04, +1],
71        [-0.04, None,  -0.04, -1],
72        [-0.04, -0.04, -0.04, -0.04]],
73        terminals=[(3, 2), (3, 1)])
74
75
76def value_iteration(mdp, epsilon= 0.001):
77    U1 = {s: 0 for s in mdp.states}
78    R, T, gamma = mdp.R, mdp.T, mdp.gamma
79    while True:
80        U = U1.copy()
81        delta = 0
82        for s in mdp.states:
83            U1[s] = R(s) + gamma* max([sum([p * U[s1] for (p, s1) in T(s, a)])
84            for a in mdp.actions(s)])
85            delta = max(delta, abs(U1[s] - U[s]))
86        if delta<epsilon* (1 - gamma) / gamma:
87            return U
88
89
90def best_policy(mdp, U) :
91    pi = {}
92    for s in mdp.states:
93        pi[s] = argmax(mdp.actions(s), key= lambda a: expected_utility(a, s, U, mdp))
94    return pi
95
96
97def expected_utility(a, s, U, mdp) :
98    return sum([p * U[s1] for (p, s1) in mdp.T(s, a)])
99
100
101sequential_decision_environment = GridMDP([[-0.04, -0.04, -0.04, +1],
102                                           [-0.04, None,  -0.04, -1],
103                                           [-0.04, -0.04, -0.04, -0.04]],
104                                          terminals=[(3, 2), (3, 1)])
105
106pi = best_policy(sequential_decision_environment, value_iteration(sequential_decision_environment, .01))
107
108print_table(sequential_decision_environment.to_arrows(pi))
109
110
111
112
113
114
115
116
117
118--utils.py--該当箇所
119
120def subexpressions(x):
121    """Yield the subexpressions of an Expression (including x itself)."""
122    yield x
123    if isinstance(x, Expr):
124        for arg in x.args:
125            yield from subexpressions(arg)
126