質問編集履歴
1
追記
test
CHANGED
File without changes
|
test
CHANGED
@@ -25,3 +25,159 @@
|
|
25
25
|
|
26
26
|
|
27
27
|
教えていただけるとありがたいです
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
追記 :
|
32
|
+
|
33
|
+
一応このようなコードです
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
```python
|
38
|
+
|
39
|
+
import gamma as gam
|
40
|
+
|
41
|
+
import numpy as np
|
42
|
+
|
43
|
+
import alpha as alp
|
44
|
+
|
45
|
+
import gaussian as gau
|
46
|
+
|
47
|
+
import delta
|
48
|
+
|
49
|
+
import beta as bet
|
50
|
+
|
51
|
+
import sys
|
52
|
+
|
53
|
+
cimport numpy as np
|
54
|
+
|
55
|
+
ctypedef np.float64_t DTYPE_t
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
def e_step(xs, cor, np.ndarray[DTYPE_t, ndim=1] to_all, np.ndarray[DTYPE_t, ndim=2] to_user, np.ndarray[DTYPE_t, ndim=1] re_all, np.ndarray[DTYPE_t, ndim=2] re_user, np.ndarray[DTYPE_t, ndim=1] wo_all, np.ndarray[DTYPE_t, ndim=2] wo_topic, np.ndarray[DTYPE_t, ndim=1] lo_all, np.ndarray[DTYPE_t, ndim=2] lo_topic, int Region, int Topic,voc,User,location, VOCA, np.ndarray[DTYPE_t, ndim=2] mean, np.ndarray[DTYPE_t, ndim=3] cov):
|
60
|
+
|
61
|
+
xdata = []
|
62
|
+
|
63
|
+
r_u = []
|
64
|
+
|
65
|
+
z_u = []
|
66
|
+
|
67
|
+
cdef str w
|
68
|
+
|
69
|
+
cdef np.ndarray[DTYPE_t, ndim=1] cco
|
70
|
+
|
71
|
+
cdef int r, z
|
72
|
+
|
73
|
+
for w,v,cco in zip(xs, voc, cor):
|
74
|
+
|
75
|
+
#xsをユーザとか、場所とかチップスとかに分けている
|
76
|
+
|
77
|
+
document = ((w.split(":|:")[4]))
|
78
|
+
|
79
|
+
usr = (w.split(":|:")[0])
|
80
|
+
|
81
|
+
loc = (w.split(":|:")[1])
|
82
|
+
|
83
|
+
lat = (w.split(":|:")[2]) #緯度
|
84
|
+
|
85
|
+
lon = (w.split(":|:")[3]) #経度
|
86
|
+
|
87
|
+
z_b = 0.
|
88
|
+
|
89
|
+
znum = 0
|
90
|
+
|
91
|
+
r_b = 0.
|
92
|
+
|
93
|
+
rnum = 0
|
94
|
+
|
95
|
+
inde = User.index(usr) #usrに該当するインデックスを持ってくる
|
96
|
+
|
97
|
+
inde_loc = location.index(loc)
|
98
|
+
|
99
|
+
g=[]
|
100
|
+
|
101
|
+
#coordinate = np.array([float(w.split("::")[2]),float(w.split("::")[3])])
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
#すべてのrのなかでDuに割り当てられる確率がもっとも高いrを用いる
|
106
|
+
|
107
|
+
for r in range(Region):
|
108
|
+
|
109
|
+
be = bet.beta(re_all, re_user[inde], re_user[inde, r],re_all[r]) #re_all,re_user, re_allはランダムな数(領域数までの間で)
|
110
|
+
|
111
|
+
de = delta.delta(lo_all, lo_topic[int(w.split(":|:")[6])], lo_all[inde_loc], lo_topic[int(w.split(":|:")[6]), inde_loc])
|
112
|
+
|
113
|
+
#print(be,de)
|
114
|
+
|
115
|
+
try:
|
116
|
+
|
117
|
+
r_a = be * de * gau.gaussian(cco,mean[r], cov[r]) #r_u,dの更新式(要確認)条件(zベクトル,以前のrベクトル,wordベクトル,locベクトル)
|
118
|
+
|
119
|
+
except:
|
120
|
+
|
121
|
+
r_a = be * de
|
122
|
+
|
123
|
+
"""
|
124
|
+
|
125
|
+
print cco
|
126
|
+
|
127
|
+
print "---------"
|
128
|
+
|
129
|
+
print mean[r]
|
130
|
+
|
131
|
+
print "---------"
|
132
|
+
|
133
|
+
print cov[r]
|
134
|
+
|
135
|
+
"""
|
136
|
+
|
137
|
+
#すべてのrのなかでもっとも確率が高いrを用いる
|
138
|
+
|
139
|
+
if r_a > r_b:
|
140
|
+
|
141
|
+
rnum = r
|
142
|
+
|
143
|
+
r_b = r_a
|
144
|
+
|
145
|
+
#すべてのzのなかでDuに割り当てられる確率がもっとも高いzを用いる
|
146
|
+
|
147
|
+
for z in range(Topic):
|
148
|
+
|
149
|
+
al = alp.alpha(to_all, to_user[inde],to_user[inde, z],to_all[z])
|
150
|
+
|
151
|
+
if len(v) != 0:
|
152
|
+
|
153
|
+
ga = np.prod([gam.gamma(wo_all, wo_topic[z], wo_all[VOCA.index(wrd)], wo_topic[z, VOCA.index(wrd)]) for wrd in v])
|
154
|
+
|
155
|
+
else:
|
156
|
+
|
157
|
+
ga = gam.gamma(wo_all, wo_topic[z], 0, 0)
|
158
|
+
|
159
|
+
de = delta.delta(lo_all, lo_topic[z], lo_all[inde_loc], lo_topic[z, inde_loc])
|
160
|
+
|
161
|
+
try:
|
162
|
+
|
163
|
+
z_a = al * ga * de * gau.gaussian(cco,mean[rnum], cov[rnum]) #z_u,dの更新式(要確認)条件(以前のzベクトル,rベクトル,wordベクトル,locベクトル)
|
164
|
+
|
165
|
+
except:
|
166
|
+
|
167
|
+
z_a = al * ga * de
|
168
|
+
|
169
|
+
#print '(%s,%s,%s,%s)%s:%s' %(al,ga,de,gau.gaussian(coordinate,mean[rnum], cov[rnum]) ,z_a, z_b)
|
170
|
+
|
171
|
+
if z_a > z_b:
|
172
|
+
|
173
|
+
znum = z
|
174
|
+
|
175
|
+
z_b = z_a
|
176
|
+
|
177
|
+
xdata.append(usr+":|:"+loc+":|:"+lat+":|:"+lon+":|:"+document+":|:"+str(rnum)+":|:"+str(znum))
|
178
|
+
|
179
|
+
return xdata
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
```
|