teratail header banner
teratail header banner
質問するログイン新規登録

回答編集履歴

1

文法の修正

2019/06/25 12:38

投稿

退会済みユーザー
answer CHANGED
@@ -9,52 +9,63 @@
9
9
  from bs4 import BeautifulSoup
10
10
 
11
11
 
12
- def get_href():
13
- i = 1
12
+ i = 1
14
- num = 2
13
+ num = 2
15
- while i < 48:
14
+ while i < 48:
16
- for num in range(1, 300):
15
+ for num in range(1, 300):
17
- zero_i = str(i).zfill(2)
16
+ zero_i = str(i).zfill(2)
18
- base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
17
+ base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
19
- url = base.format(zero_i,num)
18
+ url = base.format(zero_i,num)
20
- res = requests.get(url)
19
+ res = requests.get(url)
21
- if res.status_code == 200:
20
+ if res.status_code == 200:
21
+ html = requests.get(url)
22
+ soup = BeautifulSoup(html.content,"html.parser")
23
+ for tag in soup.find_all("h3","shisetsu_name_s"):
24
+ link = tag.find("a")
25
+ url = link.get("href")
22
26
  html = requests.get(url)
23
- soup = BeautifulSoup(html.content,"html.parser")
27
+ get_soup = BeautifulSoup(html.content, "html.parser")
28
+ res_p = get_soup.find("p", "lnk_url")
29
+ if res_p is not None:
30
+ print(res_p.text)
31
+ res_p = get_soup.find("span", "name")
32
+ if res_p is not None:
33
+ print(res_p.text)
24
- for tag in soup.find_all("h3","shisetsu_name_s"):
34
+ res_p = get_soup.find("dd", "name")
25
- link = tag.find("a")
35
+ if res_p is not None:
26
- print(link)
36
+ print(res_p.text)
27
- for s_tag in soup.find_all("h3","shisetsu_name"):
37
+ for s_tag in soup.find_all("h3","shisetsu_name"):
28
- s_link = s_tag.find("a")
38
+ s_link = s_tag.find("a")
29
- s_url = s_link.get("href")
39
+ s_url = s_link.get("href")
40
+ html = requests.get(s_url)
41
+ get_soup = BeautifulSoup(html.content, "html.parser")
42
+ res_p = get_soup.find("p", "lnk_url")
43
+ if res_p is not None:
30
- print(s_url)
44
+ print(res_p.text)
45
+ res_p = get_soup.find("span", "name")
46
+ if res_p is not None:
47
+ print(res_p.text)
48
+ res_p = get_soup.find("dd", "name")
49
+ if res_p is not None:
50
+ print(res_p.text)
31
- links = soup.find_all("a","fa_name")
51
+ links = soup.find_all("a","fa_name")
32
- for link in links:
52
+ for link in links:
53
+ i_url = link.get("href")
54
+ html = requests.get(i_url)
55
+ get_soup = BeautifulSoup(html.content, "html.parser")
56
+ res_p = get_soup.find("p", "lnk_url")
57
+ if res_p is not None:
33
- print(link)
58
+ print(res_p.text)
59
+ res_p = get_soup.find("span", "name")
34
- else:
60
+ if res_p is not None:
35
- break
36
- num += 1
61
+ print(res_p.text)
62
+ res_p = get_soup.find("dd", "name")
63
+ if res_p is not None:
64
+ print(res_p.text)
37
65
  else:
38
66
  break
67
+ num += 1
68
+ else:
69
+ break
39
- i += 1
70
+ i += 1
40
-
41
-
42
- def get_soup(url):
43
- html = requests.get(url)
44
- return BeautifulSoup(html.content, "html.parser")
45
-
46
- def scraping_gh():
47
-
48
- soup = get_soup(get_href())
49
-
50
- # 整骨院の名称
51
- res_p = soup.find("span", "name")
52
- res = res_p.find(text=re.compile(""))
53
- print(res.string)
54
- # ホームページのURL
55
- res_p = soup.find("a", "lnk_url")
56
- res = res_p.find(text=re.compile(""))
57
- print(res.string)
58
-
59
- scraping_gh()
60
71
  ```