teratail header banner
teratail header banner
質問するログイン新規登録

回答編集履歴

1

文法の修正

2019/06/25 12:39

投稿

退会済みユーザー
answer CHANGED
@@ -8,52 +8,63 @@
8
8
  from bs4 import BeautifulSoup
9
9
 
10
10
 
11
- def get_href():
12
- i = 1
11
+ i = 1
13
- num = 2
12
+ num = 2
14
- while i < 48:
13
+ while i < 48:
15
- for num in range(1, 300):
14
+ for num in range(1, 300):
16
- zero_i = str(i).zfill(2)
15
+ zero_i = str(i).zfill(2)
17
- base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
16
+ base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
18
- url = base.format(zero_i,num)
17
+ url = base.format(zero_i,num)
19
- res = requests.get(url)
18
+ res = requests.get(url)
20
- if res.status_code == 200:
19
+ if res.status_code == 200:
20
+ html = requests.get(url)
21
+ soup = BeautifulSoup(html.content,"html.parser")
22
+ for tag in soup.find_all("h3","shisetsu_name_s"):
23
+ link = tag.find("a")
24
+ url = link.get("href")
21
25
  html = requests.get(url)
22
- soup = BeautifulSoup(html.content,"html.parser")
26
+ get_soup = BeautifulSoup(html.content, "html.parser")
27
+ res_p = get_soup.find("p", "lnk_url")
28
+ if res_p is not None:
29
+ print(res_p.text)
30
+ res_p = get_soup.find("span", "name")
31
+ if res_p is not None:
32
+ print(res_p.text)
23
- for tag in soup.find_all("h3","shisetsu_name_s"):
33
+ res_p = get_soup.find("dd", "name")
24
- link = tag.find("a")
34
+ if res_p is not None:
25
- print(link)
35
+ print(res_p.text)
26
- for s_tag in soup.find_all("h3","shisetsu_name"):
36
+ for s_tag in soup.find_all("h3","shisetsu_name"):
27
- s_link = s_tag.find("a")
37
+ s_link = s_tag.find("a")
28
- s_url = s_link.get("href")
38
+ s_url = s_link.get("href")
39
+ html = requests.get(s_url)
40
+ get_soup = BeautifulSoup(html.content, "html.parser")
41
+ res_p = get_soup.find("p", "lnk_url")
42
+ if res_p is not None:
29
- print(s_url)
43
+ print(res_p.text)
44
+ res_p = get_soup.find("span", "name")
45
+ if res_p is not None:
46
+ print(res_p.text)
47
+ res_p = get_soup.find("dd", "name")
48
+ if res_p is not None:
49
+ print(res_p.text)
30
- links = soup.find_all("a","fa_name")
50
+ links = soup.find_all("a","fa_name")
31
- for link in links:
51
+ for link in links:
52
+ i_url = link.get("href")
53
+ html = requests.get(i_url)
54
+ get_soup = BeautifulSoup(html.content, "html.parser")
55
+ res_p = get_soup.find("p", "lnk_url")
56
+ if res_p is not None:
32
- print(link)
57
+ print(res_p.text)
58
+ res_p = get_soup.find("span", "name")
33
- else:
59
+ if res_p is not None:
34
- break
35
- num += 1
60
+ print(res_p.text)
61
+ res_p = get_soup.find("dd", "name")
62
+ if res_p is not None:
63
+ print(res_p.text)
36
64
  else:
37
65
  break
66
+ num += 1
67
+ else:
68
+ break
38
- i += 1
69
+ i += 1
39
-
40
-
41
- def get_soup(url):
42
- html = requests.get(url)
43
- return BeautifulSoup(html.content, "html.parser")
44
-
45
- def scraping_gh():
46
-
47
- soup = get_soup(get_href())
48
-
49
- # 整骨院の名称
50
- res_p = soup.find("span", "name")
51
- res = res_p.find(text=re.compile(""))
52
- print(res.string)
53
- # ホームページのURL
54
- res_p = soup.find("a", "lnk_url")
55
- res = res_p.find(text=re.compile(""))
56
- print(res.string)
57
-
58
- scraping_gh()
59
70
  ```