編集履歴

質問編集履歴

追記

2017/08/10 22:23

投稿

kohekoh

スコア140

test CHANGED Viewed

File without changes

test CHANGED Viewed

@@ -25,3 +25,159 @@
 教えていただけるとありがたいです
+追記 ：
+一応このようなコードです
+```python
+import gamma as gam
+import numpy as np
+import alpha as alp
+import gaussian as gau
+import delta
+import beta as bet
+import sys
+cimport numpy as np
+ctypedef np.float64_t DTYPE_t
+def e_step(xs, cor, np.ndarray[DTYPE_t, ndim=1] to_all, np.ndarray[DTYPE_t, ndim=2] to_user, np.ndarray[DTYPE_t, ndim=1] re_all, np.ndarray[DTYPE_t, ndim=2] re_user, np.ndarray[DTYPE_t, ndim=1] wo_all, np.ndarray[DTYPE_t, ndim=2] wo_topic, np.ndarray[DTYPE_t, ndim=1] lo_all, np.ndarray[DTYPE_t, ndim=2] lo_topic, int Region, int Topic,voc,User,location, VOCA, np.ndarray[DTYPE_t, ndim=2] mean, np.ndarray[DTYPE_t, ndim=3] cov):
+    xdata = []
+    r_u = []
+    z_u = []
+    cdef str w
+    cdef np.ndarray[DTYPE_t, ndim=1] cco
+    cdef int r, z
+    for w,v,cco in zip(xs, voc, cor):
+        #xsをユーザとか、場所とかチップスとかに分けている
+        document = ((w.split(":|:")[4]))
+        usr = (w.split(":|:")[0])
+        loc = (w.split(":|:")[1])
+        lat = (w.split(":|:")[2]) #緯度
+        lon = (w.split(":|:")[3]) #経度
+        z_b = 0.
+        znum = 0
+        r_b = 0.
+        rnum = 0
+        inde = User.index(usr) #usrに該当するインデックスを持ってくる
+        inde_loc = location.index(loc)
+        g=[]
+        #coordinate = np.array([float(w.split("::")[2]),float(w.split("::")[3])])
+        #すべてのrのなかでDuに割り当てられる確率がもっとも高いrを用いる
+        for r in range(Region):
+            be = bet.beta(re_all, re_user[inde], re_user[inde, r],re_all[r]) #re_all,re_user, re_allはランダムな数(領域数までの間で)
+            de = delta.delta(lo_all, lo_topic[int(w.split(":|:")[6])], lo_all[inde_loc], lo_topic[int(w.split(":|:")[6]), inde_loc])
+            #print(be,de)
+            try:
+                r_a = be * de * gau.gaussian(cco,mean[r], cov[r])    #r_u,dの更新式(要確認)条件(zベクトル,以前のrベクトル,wordベクトル,locベクトル)
+            except:
+                r_a = be * de
+                """
+                print cco
+                print "---------"
+                print mean[r]
+                print "---------"
+                print cov[r]
+                """
+            #すべてのrのなかでもっとも確率が高いrを用いる
+            if r_a > r_b:
+                rnum = r
+                r_b = r_a
+        #すべてのｚのなかでDuに割り当てられる確率がもっとも高いｚを用いる
+        for z in range(Topic):
+            al = alp.alpha(to_all, to_user[inde],to_user[inde, z],to_all[z])
+            if len(v) != 0:
+                ga = np.prod([gam.gamma(wo_all, wo_topic[z], wo_all[VOCA.index(wrd)], wo_topic[z, VOCA.index(wrd)]) for wrd in v])
+            else:
+                ga = gam.gamma(wo_all, wo_topic[z], 0, 0)
+            de = delta.delta(lo_all, lo_topic[z], lo_all[inde_loc], lo_topic[z, inde_loc])
+            try:
+                z_a = al * ga * de * gau.gaussian(cco,mean[rnum], cov[rnum]) #z_u,dの更新式(要確認)条件(以前のzベクトル,rベクトル,wordベクトル,locベクトル)
+            except:
+                z_a =  al * ga * de
+            #print '(%s,%s,%s,%s)%s:%s' %(al,ga,de,gau.gaussian(coordinate,mean[rnum], cov[rnum]) ,z_a, z_b)
+            if z_a > z_b:
+                znum = z
+                z_b = z_a
+        xdata.append(usr+":|:"+loc+":|:"+lat+":|:"+lon+":|:"+document+":|:"+str(rnum)+":|:"+str(znum))
+    return xdata
+```