質問するログイン新規登録

回答編集履歴

1

修正コードを追加

2019/10/07 05:13

投稿

magichan
magichan

スコア15898

answer CHANGED
@@ -40,4 +40,44 @@
40
40
  print (x[0],x[1])
41
41
  ```
42
42
 
43
- のようにするとよいのではないでしょうか。
43
+ のようにするとよいのではないでしょうか。
44
+
45
+ ---
46
+
47
+ **【修正コード】**
48
+ ```Python
49
+ from pymongo import MongoClient
50
+ from bs4 import BeautifulSoup
51
+ import MeCab
52
+ from gensim.models import word2vec
53
+
54
+ mecab = MeCab.Tagger ('/usr/local/lib/mecab/dic/mecab-ipadic-neologd')
55
+ def main():
56
+ recipes = []
57
+ client = MongoClient('localhost', 27017)
58
+ db = client.html.cookpad_html
59
+ collection = db.test_collection
60
+ htmls = list(db.find().limit(100))
61
+ recipes = []
62
+ for num, html in enumerate(htmls):
63
+ soup = BeautifulSoup(html["html"], 'lxml')
64
+ for steps in soup.find_all(attrs={"class": "step_text"}):
65
+ node = mecab.parseToNode(steps.get_text())
66
+
67
+ while node:
68
+ if node.feature.split(",")[0] == '名詞':
69
+ recipes.append(node.feature.split(",")[6])
70
+ node = node.next
71
+ recipes = list(set(recipes))
72
+ print(recipes)
73
+
74
+ model = word2vec.Word2Vec(recipes, size=200,min_count=1)
75
+
76
+ out = model.most_similar(positive=[u'チョコ'])
77
+ for x in out:
78
+ print (x[0],x[1])
79
+
80
+
81
+ if __name__ == '__main__':
82
+ main()
83
+ ```