回答編集履歴
1
修正コードを追加
answer
CHANGED
@@ -40,4 +40,44 @@
|
|
40
40
|
print (x[0],x[1])
|
41
41
|
```
|
42
42
|
|
43
|
-
のようにするとよいのではないでしょうか。
|
43
|
+
のようにするとよいのではないでしょうか。
|
44
|
+
|
45
|
+
---
|
46
|
+
|
47
|
+
**【修正コード】**
|
48
|
+
```Python
|
49
|
+
from pymongo import MongoClient
|
50
|
+
from bs4 import BeautifulSoup
|
51
|
+
import MeCab
|
52
|
+
from gensim.models import word2vec
|
53
|
+
|
54
|
+
mecab = MeCab.Tagger ('/usr/local/lib/mecab/dic/mecab-ipadic-neologd')
|
55
|
+
def main():
|
56
|
+
recipes = []
|
57
|
+
client = MongoClient('localhost', 27017)
|
58
|
+
db = client.html.cookpad_html
|
59
|
+
collection = db.test_collection
|
60
|
+
htmls = list(db.find().limit(100))
|
61
|
+
recipes = []
|
62
|
+
for num, html in enumerate(htmls):
|
63
|
+
soup = BeautifulSoup(html["html"], 'lxml')
|
64
|
+
for steps in soup.find_all(attrs={"class": "step_text"}):
|
65
|
+
node = mecab.parseToNode(steps.get_text())
|
66
|
+
|
67
|
+
while node:
|
68
|
+
if node.feature.split(",")[0] == '名詞':
|
69
|
+
recipes.append(node.feature.split(",")[6])
|
70
|
+
node = node.next
|
71
|
+
recipes = list(set(recipes))
|
72
|
+
print(recipes)
|
73
|
+
|
74
|
+
model = word2vec.Word2Vec(recipes, size=200,min_count=1)
|
75
|
+
|
76
|
+
out = model.most_similar(positive=[u'チョコ'])
|
77
|
+
for x in out:
|
78
|
+
print (x[0],x[1])
|
79
|
+
|
80
|
+
|
81
|
+
if __name__ == '__main__':
|
82
|
+
main()
|
83
|
+
```
|