回答編集履歴
1
文法の修正
    
        answer	
    CHANGED
    
    | @@ -8,52 +8,63 @@ | |
| 8 8 | 
             
            from bs4 import BeautifulSoup
         | 
| 9 9 |  | 
| 10 10 |  | 
| 11 | 
            -
            def get_href():
         | 
| 12 | 
            -
             | 
| 11 | 
            +
            i = 1
         | 
| 13 | 
            -
             | 
| 12 | 
            +
            num = 2
         | 
| 14 | 
            -
             | 
| 13 | 
            +
            while i < 48:
         | 
| 15 | 
            -
             | 
| 14 | 
            +
                for num in range(1, 300):
         | 
| 16 | 
            -
             | 
| 15 | 
            +
                    zero_i = str(i).zfill(2)
         | 
| 17 | 
            -
             | 
| 16 | 
            +
                    base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
         | 
| 18 | 
            -
             | 
| 17 | 
            +
                    url = base.format(zero_i,num)
         | 
| 19 | 
            -
             | 
| 18 | 
            +
                    res = requests.get(url)
         | 
| 20 | 
            -
             | 
| 19 | 
            +
                    if res.status_code == 200:
         | 
| 20 | 
            +
                        html = requests.get(url)
         | 
| 21 | 
            +
                        soup = BeautifulSoup(html.content,"html.parser")
         | 
| 22 | 
            +
                        for tag in soup.find_all("h3","shisetsu_name_s"):
         | 
| 23 | 
            +
                            link = tag.find("a")
         | 
| 24 | 
            +
                            url = link.get("href")
         | 
| 21 25 | 
             
                            html = requests.get(url)
         | 
| 22 | 
            -
                             | 
| 26 | 
            +
                            get_soup = BeautifulSoup(html.content, "html.parser")
         | 
| 27 | 
            +
                            res_p = get_soup.find("p", "lnk_url")
         | 
| 28 | 
            +
                            if res_p is not None:
         | 
| 29 | 
            +
                                print(res_p.text)
         | 
| 30 | 
            +
                            res_p = get_soup.find("span", "name")
         | 
| 31 | 
            +
                            if res_p is not None:
         | 
| 32 | 
            +
                                print(res_p.text)
         | 
| 23 | 
            -
                             | 
| 33 | 
            +
                            res_p = get_soup.find("dd", "name")
         | 
| 24 | 
            -
             | 
| 34 | 
            +
                            if res_p is not None:
         | 
| 25 | 
            -
                                print( | 
| 35 | 
            +
                                print(res_p.text)
         | 
| 26 | 
            -
             | 
| 36 | 
            +
                        for s_tag in soup.find_all("h3","shisetsu_name"):
         | 
| 27 | 
            -
             | 
| 37 | 
            +
                            s_link = s_tag.find("a")
         | 
| 28 | 
            -
             | 
| 38 | 
            +
                            s_url = s_link.get("href")
         | 
| 39 | 
            +
                            html = requests.get(s_url)
         | 
| 40 | 
            +
                            get_soup = BeautifulSoup(html.content, "html.parser")
         | 
| 41 | 
            +
                            res_p = get_soup.find("p", "lnk_url")
         | 
| 42 | 
            +
                            if res_p is not None:
         | 
| 29 | 
            -
                                print( | 
| 43 | 
            +
                                print(res_p.text)
         | 
| 44 | 
            +
                            res_p = get_soup.find("span", "name")
         | 
| 45 | 
            +
                            if res_p is not None:
         | 
| 46 | 
            +
                                print(res_p.text)
         | 
| 47 | 
            +
                            res_p = get_soup.find("dd", "name")
         | 
| 48 | 
            +
                            if res_p is not None:
         | 
| 49 | 
            +
                                print(res_p.text)
         | 
| 30 | 
            -
             | 
| 50 | 
            +
                        links = soup.find_all("a","fa_name")
         | 
| 31 | 
            -
             | 
| 51 | 
            +
                        for link in links:
         | 
| 52 | 
            +
                            i_url = link.get("href")
         | 
| 53 | 
            +
                            html = requests.get(i_url)
         | 
| 54 | 
            +
                            get_soup = BeautifulSoup(html.content, "html.parser")
         | 
| 55 | 
            +
                            res_p = get_soup.find("p", "lnk_url")
         | 
| 56 | 
            +
                            if res_p is not None:
         | 
| 32 | 
            -
                                print( | 
| 57 | 
            +
                                print(res_p.text)
         | 
| 58 | 
            +
                            res_p = get_soup.find("span", "name")
         | 
| 33 | 
            -
             | 
| 59 | 
            +
                            if res_p is not None:
         | 
| 34 | 
            -
                            break
         | 
| 35 | 
            -
             | 
| 60 | 
            +
                                print(res_p.text)
         | 
| 61 | 
            +
                            res_p = get_soup.find("dd", "name")
         | 
| 62 | 
            +
                            if res_p is not None:
         | 
| 63 | 
            +
                                print(res_p.text)
         | 
| 36 64 | 
             
                    else:
         | 
| 37 65 | 
             
                        break
         | 
| 66 | 
            +
                    num += 1
         | 
| 67 | 
            +
                else:
         | 
| 68 | 
            +
                    break
         | 
| 38 | 
            -
             | 
| 69 | 
            +
                i += 1
         | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
            def get_soup(url):
         | 
| 42 | 
            -
                html = requests.get(url)
         | 
| 43 | 
            -
                return BeautifulSoup(html.content, "html.parser")
         | 
| 44 | 
            -
             | 
| 45 | 
            -
            def scraping_gh():
         | 
| 46 | 
            -
             | 
| 47 | 
            -
                soup = get_soup(get_href())
         | 
| 48 | 
            -
             | 
| 49 | 
            -
                # 整骨院の名称
         | 
| 50 | 
            -
                res_p = soup.find("span", "name")
         | 
| 51 | 
            -
                res = res_p.find(text=re.compile(""))
         | 
| 52 | 
            -
                print(res.string)
         | 
| 53 | 
            -
                # ホームページのURL
         | 
| 54 | 
            -
                res_p = soup.find("a", "lnk_url")
         | 
| 55 | 
            -
                res = res_p.find(text=re.compile(""))
         | 
| 56 | 
            -
                print(res.string)
         | 
| 57 | 
            -
             | 
| 58 | 
            -
            scraping_gh()
         | 
| 59 70 | 
             
            ```
         | 
