回答編集履歴
3
bsdfanさんのロジックを取り入れて修正。私の環境ではnumba版と比べてcython版は数割速くなるようです。
answer
CHANGED
@@ -22,16 +22,24 @@
|
|
22
22
|
result2 = cy.test_20190524_changed(a, b, c, x, y)
|
23
23
|
print('time : ' + str(round((t.time() - start),5)) + ' [sec]')
|
24
24
|
|
25
|
-
|
25
|
+
start = t.time()
|
26
|
+
result3 = cy.test_20190524_changed2(a, b, c, x, y)
|
27
|
+
print('time : ' + str(round((t.time() - start),5)) + ' [sec]')
|
26
28
|
|
29
|
+
|
30
|
+
print((result1 == result2).all() and
|
31
|
+
(result2 == result3).all())
|
32
|
+
|
27
33
|
if __name__ == "__main__":
|
28
34
|
main()
|
29
35
|
""" =>
|
30
|
-
time : 0.
|
36
|
+
time : 0.09572 [sec]
|
37
|
+
time : 0.01765 [sec]
|
31
|
-
time : 0.
|
38
|
+
time : 0.01645 [sec]
|
32
39
|
True
|
33
40
|
"""
|
34
41
|
|
42
|
+
|
35
43
|
```
|
36
44
|
|
37
45
|
**test_20190524.pyx**
|
@@ -66,6 +74,31 @@
|
|
66
74
|
|
67
75
|
@cython.boundscheck(False)
|
68
76
|
@cython.wraparound(False)
|
77
|
+
cpdef np.ndarray[np_float_t, ndim=2] test_20190524_changed2(
|
78
|
+
np.ndarray[np_float_t, ndim=3] a,
|
79
|
+
np.ndarray[np_float_t, ndim=2] b,
|
80
|
+
np.ndarray[np_int_t, ndim=1] c,
|
81
|
+
np.ndarray[np_int_t, ndim=1] x,
|
82
|
+
np.ndarray[np_int_t, ndim=1] y):
|
83
|
+
|
84
|
+
cdef np.ndarray[np_float_t, ndim=3] d
|
85
|
+
cdef np.ndarray[np_float_t, ndim=2] img
|
86
|
+
cdef int i, j, k, num
|
87
|
+
|
88
|
+
num = 10000
|
89
|
+
|
90
|
+
d = a * b[c, 0]
|
91
|
+
|
92
|
+
img = np.zeros((10000, 10000))
|
93
|
+
|
94
|
+
for i in range(num):
|
95
|
+
for j in range(2):
|
96
|
+
for k in range(2):
|
97
|
+
img[x[i] + j , y[i] + k] += d[j, k, i]
|
98
|
+
return img
|
99
|
+
|
100
|
+
@cython.boundscheck(False)
|
101
|
+
@cython.wraparound(False)
|
69
102
|
cpdef np.ndarray[np_int_t, ndim=1] test_20190524_origin(
|
70
103
|
np.ndarray[np_float_t, ndim=3] a,
|
71
104
|
np.ndarray[np_float_t, ndim=2] b,
|
2
改行の編集
answer
CHANGED
@@ -63,6 +63,7 @@
|
|
63
63
|
for k in range(2):
|
64
64
|
img[x[i] + j , y[i] + k] += a[j, k, i] * b[c[i], 0]
|
65
65
|
return img
|
66
|
+
|
66
67
|
@cython.boundscheck(False)
|
67
68
|
@cython.wraparound(False)
|
68
69
|
cpdef np.ndarray[np_int_t, ndim=1] test_20190524_origin(
|
1
デコレータの修正。速度はほとんど変化なし
answer
CHANGED
@@ -45,7 +45,6 @@
|
|
45
45
|
|
46
46
|
@cython.boundscheck(False)
|
47
47
|
@cython.wraparound(False)
|
48
|
-
|
49
48
|
cpdef np.ndarray[np_float_t, ndim=2] test_20190524_changed(
|
50
49
|
np.ndarray[np_float_t, ndim=3] a,
|
51
50
|
np.ndarray[np_float_t, ndim=2] b,
|
@@ -64,7 +63,8 @@
|
|
64
63
|
for k in range(2):
|
65
64
|
img[x[i] + j , y[i] + k] += a[j, k, i] * b[c[i], 0]
|
66
65
|
return img
|
67
|
-
|
66
|
+
@cython.boundscheck(False)
|
67
|
+
@cython.wraparound(False)
|
68
68
|
cpdef np.ndarray[np_int_t, ndim=1] test_20190524_origin(
|
69
69
|
np.ndarray[np_float_t, ndim=3] a,
|
70
70
|
np.ndarray[np_float_t, ndim=2] b,
|