回答編集履歴

修正コードを追加

2019/10/07 05:13

投稿

スコア15898

answer CHANGED Viewed

@@ -40,4 +40,44 @@
     print (x[0],x[1])
 ```
-のようにするとよいのではないでしょうか。
+のようにするとよいのではないでしょうか。
+---
+**【修正コード】**
+```Python
+from pymongo import MongoClient
+from bs4 import BeautifulSoup
+import MeCab
+from gensim.models import word2vec
+mecab = MeCab.Tagger ('/usr/local/lib/mecab/dic/mecab-ipadic-neologd')
+def main():
+    recipes = []
+    client = MongoClient('localhost', 27017)
+    db = client.html.cookpad_html
+    collection = db.test_collection
+    htmls = list(db.find().limit(100))
+    recipes = []
+    for num, html in enumerate(htmls):
+        soup = BeautifulSoup(html["html"], 'lxml')
+        for steps in soup.find_all(attrs={"class": "step_text"}):
+            node = mecab.parseToNode(steps.get_text())
+            while node:
+                if node.feature.split(",")[0] == '名詞':
+                    recipes.append(node.feature.split(",")[6])
+                node = node.next
+                recipes = list(set(recipes))
+    print(recipes)
+    model = word2vec.Word2Vec(recipes, size=200,min_count=1)
+    out = model.most_similar(positive=[u'チョコ'])
+    for x in out:
+        print (x[0],x[1])
+if __name__ == '__main__':
+    main()
+```