トップに関する質問【Django】フォームにURLを入力しsubmitすると、そのURLをスクレイピングして文章を表示する機能を作成したい。

編集履歴

質問編集履歴

修正

2019/07/15 07:57

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -48,7 +48,7 @@
     return render(request, 'classifier/index.html',d)
+```
 ```Python
 #index.html
 {% extends "classifier/base.html" %}

修正

2019/07/15 07:57

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -19,34 +19,34 @@
     if url == None:
         d = {
-             category = "urlを入力して下さい。"
+             'category' : "urlを入力して下さい。"
         }
     else:
         # 入力されたURLでの本文を取得する。
         r = requests.get(url)
         soup = BeautifulSoup(r.text, 'lxml')
-        text = soup.find_all('p')
+        doc = soup.find_all('p')
         #取得した本文を形態素解析し、名詞のみ抽出
         word_list=''
+        for text in doc:
-        m = MeCab.Tagger()
+            m = MeCab.Tagger()
-        m_text = m.parse(text.text)
+            m_text = m.parse(text.text)
-        for row in m_text.split("\n"):
+            for row in m_text.split("\n"):
-            word =row.split("\t")[0]#タブ区切りになっている１つ目を取り出す。ここには形態素が格納されている
+                word =row.split("\t")[0]#タブ区切りになっている１つ目を取り出す。ここには形態素が格納されている
-            if word == "EOS":
+                if word == "EOS":
-                break
+                    break
-            else:
+                else:
-                pos = row.split("\t")[1]#タブ区切りになっている2つ目を取り出す。ここには品詞が格納されている
+                    pos = row.split("\t")[1]#タブ区切りになっている2つ目を取り出す。ここには品詞が格納されている
-                slice = pos[:2]
+                    slice = pos[:2]
-                if slice == "名詞":
+                    if slice == "名詞":
-                    word_list = word_list +" "+ word
+                        word_list = word_list +" "+ word
         d = {
             'category': word_list
         }
     return render(request, 'classifier/index.html',d)
-```
 ```Python

修正

2019/07/15 07:55

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -16,11 +16,12 @@
 def index(request):
     url = request.GET.get('url')
-　　 #追加（1）
     if url == None:
+        d = {
-        category = "urlを入力して下さい。"
+             category = "urlを入力して下さい。"
+        }
     else:
-    #/追加(1)
         # 入力されたURLでの本文を取得する。
         r = requests.get(url)
         soup = BeautifulSoup(r.text, 'lxml')
@@ -40,9 +41,9 @@
                 if slice == "名詞":
                     word_list = word_list +" "+ word
-    d = {
+        d = {
-        'category': word_list
+            'category': word_list
-    }
+        }
     return render(request, 'classifier/index.html',d)
 ```

urlがNoneの場合

2019/07/15 07:44

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -6,6 +6,7 @@
 ![エラーの内容](943f2b3f634cb39a2309b19c8758d831.png)
+**・urlがNoneの場合の処理をviews.pyに追加**
 ```Python
 #Views.py
 from django.http.response import HttpResponse
@@ -13,35 +14,40 @@
 from bs4 import BeautifulSoup
 def index(request):
     url = request.GET.get('url')
+　　 #追加（1）
+    if url == None:
+        category = "urlを入力して下さい。"
+    else:
+    #/追加(1)
+        # 入力されたURLでの本文を取得する。
+        r = requests.get(url)
+        soup = BeautifulSoup(r.text, 'lxml')
+        text = soup.find_all('p')
-    # 入力されたURLでの本文を取得する。
+        #取得した本文を形態素解析し、名詞のみ抽出
+        word_list=''
-    r = requests.get(url)
+        m = MeCab.Tagger()
-    soup = BeautifulSoup(r.text, 'lxml')
-    text = soup.find_all('p')
+        m_text = m.parse(text.text)
+        for row in m_text.split("\n"):
+            word =row.split("\t")[0]#タブ区切りになっている１つ目を取り出す。ここには形態素が格納されている
+            if word == "EOS":
+                break
+            else:
+                pos = row.split("\t")[1]#タブ区切りになっている2つ目を取り出す。ここには品詞が格納されている
+                slice = pos[:2]
+                if slice == "名詞":
+                    word_list = word_list +" "+ word
-    #取得した本文を形態素解析し、名詞のみ抽出
-    word_list=''
-    m = MeCab.Tagger()
-    m_text = m.parse(text.text)
-    for row in m_text.split("\n"):
-        word =row.split("\t")[0]#タブ区切りになっている１つ目を取り出す。ここには形態素が格納されている
-        if word == "EOS":
-            break
-        else:
-            pos = row.split("\t")[1]#タブ区切りになっている2つ目を取り出す。ここには品詞が格納されている
-            slice = pos[:2]
-            if slice == "名詞":
-                word_list = word_list +" "+ word
-    return word_list
     d = {
-        'category': wordlist
+        'category': word_list
     }
-    return render(request, 'index.html',d)
+    return render(request, 'classifier/index.html',d)
 ```
 ```Python
 #index.html
 {% extends "classifier/base.html" %}

追加

2019/07/15 07:42

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -39,7 +39,7 @@
         'category': wordlist
     }
-    return render(request, 'classifier/index.html',d)
+    return render(request, 'index.html',d)
 ```
 ```Python

追加

2019/07/15 07:00

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -2,6 +2,8 @@
 例えば、フォームにyahoo.comと入力した場合、そのURLでの本文を取得し表示。
+下記はローカルでhttp://localhost:8000/index/にアクセスした際に起こるエラーです。
 ![エラーの内容](943f2b3f634cb39a2309b19c8758d831.png)
 ```Python

誤字

2019/07/15 06:53

投稿

退会済みユーザー

スコア0

title CHANGED Viewed

File without changes

body CHANGED Viewed

@@ -57,4 +57,6 @@
 {% endblock %}
 ```
-かれこれ3時間ほど悩んでいるのですあ、解決できません。。よろしくお願いします。
+かれこれ3時間ほど悩んでいるのですが、解決できません。。よろしくお願いします。
+また、エラーの解決に必要なファイルがあれば追加するので、お申し付けください。