回答編集履歴

1

文法の修正

2019/06/25 12:39

投稿

退会済みユーザー
test CHANGED
@@ -18,100 +18,122 @@
18
18
 
19
19
 
20
20
 
21
- def get_href():
21
+ i = 1
22
22
 
23
- i = 1
23
+ num = 2
24
24
 
25
- num = 2
25
+ while i < 48:
26
26
 
27
- while i < 48:
27
+ for num in range(1, 300):
28
28
 
29
- for num in range(1, 300):
29
+ zero_i = str(i).zfill(2)
30
30
 
31
- zero_i = str(i).zfill(2)
31
+ base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
32
32
 
33
- base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
33
+ url = base.format(zero_i,num)
34
34
 
35
- url = base.format(zero_i,num)
35
+ res = requests.get(url)
36
36
 
37
- res = requests.get(url)
37
+ if res.status_code == 200:
38
38
 
39
+ html = requests.get(url)
40
+
41
+ soup = BeautifulSoup(html.content,"html.parser")
42
+
43
+ for tag in soup.find_all("h3","shisetsu_name_s"):
44
+
39
- if res.status_code == 200:
45
+ link = tag.find("a")
46
+
47
+ url = link.get("href")
40
48
 
41
49
  html = requests.get(url)
42
50
 
43
- soup = BeautifulSoup(html.content,"html.parser")
51
+ get_soup = BeautifulSoup(html.content, "html.parser")
44
52
 
45
- for tag in soup.find_all("h3","shisetsu_name_s"):
53
+ res_p = get_soup.find("p", "lnk_url")
46
54
 
47
- link = tag.find("a")
55
+ if res_p is not None:
48
56
 
49
- print(link)
57
+ print(res_p.text)
50
58
 
51
- for s_tag in soup.find_all("h3","shisetsu_name"):
59
+ res_p = get_soup.find("span", "name")
52
60
 
53
- s_link = s_tag.find("a")
61
+ if res_p is not None:
54
62
 
55
- s_url = s_link.get("href")
63
+ print(res_p.text)
56
64
 
57
- print(s_url)
65
+ res_p = get_soup.find("dd", "name")
58
66
 
59
- links = soup.find_all("a","fa_name")
67
+ if res_p is not None:
60
68
 
61
- for link in links:
69
+ print(res_p.text)
62
70
 
63
- print(link)
71
+ for s_tag in soup.find_all("h3","shisetsu_name"):
64
72
 
65
- else:
73
+ s_link = s_tag.find("a")
66
74
 
67
- break
75
+ s_url = s_link.get("href")
68
76
 
77
+ html = requests.get(s_url)
78
+
79
+ get_soup = BeautifulSoup(html.content, "html.parser")
80
+
81
+ res_p = get_soup.find("p", "lnk_url")
82
+
83
+ if res_p is not None:
84
+
85
+ print(res_p.text)
86
+
87
+ res_p = get_soup.find("span", "name")
88
+
89
+ if res_p is not None:
90
+
91
+ print(res_p.text)
92
+
93
+ res_p = get_soup.find("dd", "name")
94
+
95
+ if res_p is not None:
96
+
97
+ print(res_p.text)
98
+
99
+ links = soup.find_all("a","fa_name")
100
+
69
- num += 1
101
+ for link in links:
102
+
103
+ i_url = link.get("href")
104
+
105
+ html = requests.get(i_url)
106
+
107
+ get_soup = BeautifulSoup(html.content, "html.parser")
108
+
109
+ res_p = get_soup.find("p", "lnk_url")
110
+
111
+ if res_p is not None:
112
+
113
+ print(res_p.text)
114
+
115
+ res_p = get_soup.find("span", "name")
116
+
117
+ if res_p is not None:
118
+
119
+ print(res_p.text)
120
+
121
+ res_p = get_soup.find("dd", "name")
122
+
123
+ if res_p is not None:
124
+
125
+ print(res_p.text)
70
126
 
71
127
  else:
72
128
 
73
129
  break
74
130
 
75
- i += 1
131
+ num += 1
76
132
 
133
+ else:
77
134
 
135
+ break
78
136
 
79
-
80
-
81
- def get_soup(url):
82
-
83
- html = requests.get(url)
84
-
85
- return BeautifulSoup(html.content, "html.parser")
86
-
87
-
88
-
89
- def scraping_gh():
90
-
91
-
92
-
93
- soup = get_soup(get_href())
94
-
95
-
96
-
97
- # 整骨院の名称
137
+ i += 1
98
-
99
- res_p = soup.find("span", "name")
100
-
101
- res = res_p.find(text=re.compile(""))
102
-
103
- print(res.string)
104
-
105
- # ホームページのURL
106
-
107
- res_p = soup.find("a", "lnk_url")
108
-
109
- res = res_p.find(text=re.compile(""))
110
-
111
- print(res.string)
112
-
113
-
114
-
115
- scraping_gh()
116
138
 
117
139
  ```