質問編集履歴

1

タイトルの変更と該当箇所のソースの貼り付け

2017/12/21 17:45

投稿

benkyouchuu
benkyouchuu

スコア26

test CHANGED
@@ -1 +1 @@
1
- pythonのエラー対処方法につ
1
+ gensimとMecabを使った機械学習のエラーがなかなか修正できません。助けてくださ
test CHANGED
@@ -41,3 +41,89 @@
41
41
 
42
42
 
43
43
  半日以上悩んでいます。誰か助けてください。よろしくお願いします。
44
+
45
+
46
+
47
+
48
+
49
+ 該当箇所も載せておきます。
50
+
51
+
52
+
53
+ ```python
54
+
55
+ def mecab(db,estimator):
56
+
57
+ dates =[]
58
+
59
+ labels = []
60
+
61
+ for age in range(1,7):
62
+
63
+ docs = []
64
+
65
+ descriptions = (data['description'].encode('utf-8') for data in db.profile.find({"age": age*10}))
66
+
67
+ tagger = MeCab.Tagger('-Ochasen')
68
+
69
+ counter = Counter()
70
+
71
+ a = list(descriptions)
72
+
73
+ print a[0],age
74
+
75
+ for description in a:
76
+
77
+ nodes = tagger.parseToNode(description)
78
+
79
+ while nodes:
80
+
81
+ if nodes.feature.split(',')[0] == '名詞':
82
+
83
+ word = nodes.surface.decode('utf-8')
84
+
85
+ counter[word] += 1
86
+
87
+ nodes = nodes.next
88
+
89
+ for word, cnt in counter.most_common():
90
+
91
+ docs.append(json.dumps(word, ensure_ascii=False))
92
+
93
+ labels.append(age)
94
+
95
+ data_train = dictionary(docs,age,estimator)
96
+
97
+ dates.append(data_train)
98
+
99
+ data_train_s, data_test_s, label_train_s, label_test_s = train_test_split(dates, labels, test_size=0.5)
100
+
101
+ print len(data_train_s)
102
+
103
+ print len(label_train_s)
104
+
105
+ estimator.fit(data_train_s, label_train_s)
106
+
107
+ print(estimator.score(data_test_s, label_test_s))
108
+
109
+
110
+
111
+ def dictionary(docs,age,estimator):
112
+
113
+ dictionary = gensim.corpora.Dictionary([docs])
114
+
115
+ data_train=[]
116
+
117
+ for doc in docs:
118
+
119
+ tmp=dictionary.doc2bow([doc])
120
+
121
+ dense = list(gensim.matutils.corpus2dense([tmp], num_terms=len(dictionary)).T[0])
122
+
123
+ age_arr=[age]
124
+
125
+ data_train.append(dense)
126
+
127
+ return data_train
128
+
129
+ ```