回答編集履歴

文法の修正

2019/06/25 12:38

投稿

退会済みユーザー

スコア0

answer CHANGED Viewed

@@ -9,52 +9,63 @@
 from bs4 import BeautifulSoup
-def get_href():
-    i = 1
+i = 1
-    num = 2
+num = 2
-    while i < 48:
+while i < 48:
-        for num in range(1, 300):
+    for num in range(1, 300):
-            zero_i = str(i).zfill(2)
+        zero_i = str(i).zfill(2)
-            base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
+        base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
-            url = base.format(zero_i,num)
+        url = base.format(zero_i,num)
-            res = requests.get(url)
+        res = requests.get(url)
-            if res.status_code == 200:
+        if res.status_code == 200:
+            html = requests.get(url)
+            soup = BeautifulSoup(html.content,"html.parser")
+            for tag in soup.find_all("h3","shisetsu_name_s"):
+                link = tag.find("a")
+                url = link.get("href")
                 html = requests.get(url)
-                soup = BeautifulSoup(html.content,"html.parser")
+                get_soup = BeautifulSoup(html.content, "html.parser")
+                res_p = get_soup.find("p", "lnk_url")
+                if res_p is not None:
+                    print(res_p.text)
+                res_p = get_soup.find("span", "name")
+                if res_p is not None:
+                    print(res_p.text)
-                for tag in soup.find_all("h3","shisetsu_name_s"):
+                res_p = get_soup.find("dd", "name")
-                    link = tag.find("a")
+                if res_p is not None:
-                    print(link)
+                    print(res_p.text)
-                for s_tag in soup.find_all("h3","shisetsu_name"):
+            for s_tag in soup.find_all("h3","shisetsu_name"):
-                    s_link = s_tag.find("a")
+                s_link = s_tag.find("a")
-                    s_url = s_link.get("href")
+                s_url = s_link.get("href")
+                html = requests.get(s_url)
+                get_soup = BeautifulSoup(html.content, "html.parser")
+                res_p = get_soup.find("p", "lnk_url")
+                if res_p is not None:
-                    print(s_url)
+                    print(res_p.text)
+                res_p = get_soup.find("span", "name")
+                if res_p is not None:
+                    print(res_p.text)
+                res_p = get_soup.find("dd", "name")
+                if res_p is not None:
+                    print(res_p.text)
-                links = soup.find_all("a","fa_name")
+            links = soup.find_all("a","fa_name")
-                for link in links:
+            for link in links:
+                i_url = link.get("href")
+                html = requests.get(i_url)
+                get_soup = BeautifulSoup(html.content, "html.parser")
+                res_p = get_soup.find("p", "lnk_url")
+                if res_p is not None:
-                    print(link)
+                    print(res_p.text)
+                res_p = get_soup.find("span", "name")
-            else:
+                if res_p is not None:
-                break
-            num += 1
+                    print(res_p.text)
+                res_p = get_soup.find("dd", "name")
+                if res_p is not None:
+                    print(res_p.text)
         else:
             break
+        num += 1
+    else:
+        break
-        i += 1
+    i += 1
-def get_soup(url):
-    html = requests.get(url)
-    return BeautifulSoup(html.content, "html.parser")
-def scraping_gh():
-    soup = get_soup(get_href())
-    # 整骨院の名称
-    res_p = soup.find("span", "name")
-    res = res_p.find(text=re.compile(""))
-    print(res.string)
-    # ホームページのURL
-    res_p = soup.find("a", "lnk_url")
-    res = res_p.find(text=re.compile(""))
-    print(res.string)
-scraping_gh()
 ```