回答編集履歴

1

文法の修正

2019/06/25 12:38

投稿

退会済みユーザー
test CHANGED
@@ -20,100 +20,122 @@
20
20
 
21
21
 
22
22
 
23
- def get_href():
23
+ i = 1
24
24
 
25
- i = 1
25
+ num = 2
26
26
 
27
- num = 2
27
+ while i < 48:
28
28
 
29
- while i < 48:
29
+ for num in range(1, 300):
30
30
 
31
- for num in range(1, 300):
31
+ zero_i = str(i).zfill(2)
32
32
 
33
- zero_i = str(i).zfill(2)
33
+ base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
34
34
 
35
- base = 'https://www.judo-ch.jp/sekkotsuinsrch/{}/list/{}/'
35
+ url = base.format(zero_i,num)
36
36
 
37
- url = base.format(zero_i,num)
37
+ res = requests.get(url)
38
38
 
39
- res = requests.get(url)
39
+ if res.status_code == 200:
40
40
 
41
+ html = requests.get(url)
42
+
43
+ soup = BeautifulSoup(html.content,"html.parser")
44
+
45
+ for tag in soup.find_all("h3","shisetsu_name_s"):
46
+
41
- if res.status_code == 200:
47
+ link = tag.find("a")
48
+
49
+ url = link.get("href")
42
50
 
43
51
  html = requests.get(url)
44
52
 
45
- soup = BeautifulSoup(html.content,"html.parser")
53
+ get_soup = BeautifulSoup(html.content, "html.parser")
46
54
 
47
- for tag in soup.find_all("h3","shisetsu_name_s"):
55
+ res_p = get_soup.find("p", "lnk_url")
48
56
 
49
- link = tag.find("a")
57
+ if res_p is not None:
50
58
 
51
- print(link)
59
+ print(res_p.text)
52
60
 
53
- for s_tag in soup.find_all("h3","shisetsu_name"):
61
+ res_p = get_soup.find("span", "name")
54
62
 
55
- s_link = s_tag.find("a")
63
+ if res_p is not None:
56
64
 
57
- s_url = s_link.get("href")
65
+ print(res_p.text)
58
66
 
59
- print(s_url)
67
+ res_p = get_soup.find("dd", "name")
60
68
 
61
- links = soup.find_all("a","fa_name")
69
+ if res_p is not None:
62
70
 
63
- for link in links:
71
+ print(res_p.text)
64
72
 
65
- print(link)
73
+ for s_tag in soup.find_all("h3","shisetsu_name"):
66
74
 
67
- else:
75
+ s_link = s_tag.find("a")
68
76
 
69
- break
77
+ s_url = s_link.get("href")
70
78
 
79
+ html = requests.get(s_url)
80
+
81
+ get_soup = BeautifulSoup(html.content, "html.parser")
82
+
83
+ res_p = get_soup.find("p", "lnk_url")
84
+
85
+ if res_p is not None:
86
+
87
+ print(res_p.text)
88
+
89
+ res_p = get_soup.find("span", "name")
90
+
91
+ if res_p is not None:
92
+
93
+ print(res_p.text)
94
+
95
+ res_p = get_soup.find("dd", "name")
96
+
97
+ if res_p is not None:
98
+
99
+ print(res_p.text)
100
+
101
+ links = soup.find_all("a","fa_name")
102
+
71
- num += 1
103
+ for link in links:
104
+
105
+ i_url = link.get("href")
106
+
107
+ html = requests.get(i_url)
108
+
109
+ get_soup = BeautifulSoup(html.content, "html.parser")
110
+
111
+ res_p = get_soup.find("p", "lnk_url")
112
+
113
+ if res_p is not None:
114
+
115
+ print(res_p.text)
116
+
117
+ res_p = get_soup.find("span", "name")
118
+
119
+ if res_p is not None:
120
+
121
+ print(res_p.text)
122
+
123
+ res_p = get_soup.find("dd", "name")
124
+
125
+ if res_p is not None:
126
+
127
+ print(res_p.text)
72
128
 
73
129
  else:
74
130
 
75
131
  break
76
132
 
77
- i += 1
133
+ num += 1
78
134
 
135
+ else:
79
136
 
137
+ break
80
138
 
81
-
82
-
83
- def get_soup(url):
84
-
85
- html = requests.get(url)
86
-
87
- return BeautifulSoup(html.content, "html.parser")
88
-
89
-
90
-
91
- def scraping_gh():
92
-
93
-
94
-
95
- soup = get_soup(get_href())
96
-
97
-
98
-
99
- # 整骨院の名称
139
+ i += 1
100
-
101
- res_p = soup.find("span", "name")
102
-
103
- res = res_p.find(text=re.compile(""))
104
-
105
- print(res.string)
106
-
107
- # ホームページのURL
108
-
109
- res_p = soup.find("a", "lnk_url")
110
-
111
- res = res_p.find(text=re.compile(""))
112
-
113
- print(res.string)
114
-
115
-
116
-
117
- scraping_gh()
118
140
 
119
141
  ```