質問編集履歴
7
コード修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -210,7 +210,7 @@
|
|
210
210
|
|
211
211
|
|
212
212
|
|
213
|
-
try: #cが
|
213
|
+
try: #cが8個ない場合に対応
|
214
214
|
|
215
215
|
for i in range(8):
|
216
216
|
|
@@ -276,7 +276,7 @@
|
|
276
276
|
|
277
277
|
|
278
278
|
|
279
|
-
try: #cが
|
279
|
+
try: #cが8個ない場合に対応
|
280
280
|
|
281
281
|
for i in range(10):
|
282
282
|
|
@@ -342,7 +342,7 @@
|
|
342
342
|
|
343
343
|
|
344
344
|
|
345
|
-
try: #cが
|
345
|
+
try: #cが8個ない場合に対応
|
346
346
|
|
347
347
|
for i in range(10):
|
348
348
|
|
6
コード修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -14,6 +14,8 @@
|
|
14
14
|
|
15
15
|
関数の中にtry、exceptを組み込んでいる影響でしょうか。
|
16
16
|
|
17
|
+
※if elseに書き換えても同様のメッセージが発生しましたので更新したコードを記載いたします。
|
18
|
+
|
17
19
|
|
18
20
|
|
19
21
|
### 発生している問題・エラーメッセージ
|
@@ -148,7 +150,9 @@
|
|
148
150
|
|
149
151
|
|
150
152
|
|
151
|
-
#1
|
153
|
+
#page_1
|
154
|
+
|
155
|
+
|
152
156
|
|
153
157
|
result1 = requests.get(url1)
|
154
158
|
|
@@ -158,218 +162,218 @@
|
|
158
162
|
|
159
163
|
|
160
164
|
|
165
|
+
if soup1.find("div", {"class":"main-inner-a"}) is None:
|
166
|
+
|
167
|
+
title_1_a = []
|
168
|
+
|
169
|
+
overview_1_a = []
|
170
|
+
|
171
|
+
link_1_a = []
|
172
|
+
|
173
|
+
else:
|
174
|
+
|
161
|
-
page_1_a = soup1.find("div", {"class":"main-inner-a"})
|
175
|
+
page_1_a = soup1.find("div", {"class":"main-inner-a"})
|
162
|
-
|
163
|
-
|
176
|
+
|
164
|
-
|
165
|
-
title_1_a = page_1_a.find("h3").text.replace("\u3000","").replace("\n","")
|
177
|
+
title_1_a = page_1_a.find("h3").text.replace("\u3000","").replace("\n","")
|
166
|
-
|
178
|
+
|
167
|
-
overview_1_a = page_1_a.find("p").text.replace("\u3000","").replace("\n","")
|
179
|
+
overview_1_a = page_1_a.find("p").text.replace("\u3000","").replace("\n","")
|
168
|
-
|
180
|
+
|
169
|
-
link_1_a = page_1_a.find("a").get("href")
|
181
|
+
link_1_a = page_1_a.find("a").get("href")
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
if soup1.find("div", {"class":"main-inner-b"}) is None:
|
186
|
+
|
187
|
+
title_1_b = []
|
188
|
+
|
189
|
+
overview_1_b = []
|
190
|
+
|
191
|
+
link_1_b = []
|
192
|
+
|
193
|
+
else:
|
194
|
+
|
195
|
+
page_1_b = soup1.find("div", {"class":"main-inner-b"})
|
196
|
+
|
197
|
+
title_1_b = page_1_b.find("h3").text.replace("\u3000","").replace("\n","")
|
198
|
+
|
199
|
+
overview_1_b = page_1_b.find("p").text.replace("\u3000","").replace("\n","")
|
200
|
+
|
201
|
+
link_1_b = page_1_b.find("a").get("href")
|
202
|
+
|
203
|
+
|
204
|
+
|
205
|
+
title_1_c = []
|
206
|
+
|
207
|
+
overview_1_c = []
|
208
|
+
|
209
|
+
link_1_c = []
|
210
|
+
|
211
|
+
|
212
|
+
|
213
|
+
try: #cがある場合
|
214
|
+
|
215
|
+
for i in range(8):
|
216
|
+
|
217
|
+
page_1_c = soup1.find_all("div", {"class":"main-inner-c"})[i]
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
tmp_title = page_1_c.find("h3").text.replace("\u3000","").replace("\n","")
|
222
|
+
|
223
|
+
title_1_c.append(tmp_title)
|
224
|
+
|
225
|
+
|
226
|
+
|
227
|
+
tmp_overview = page_1_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
|
228
|
+
|
229
|
+
overview_1_c.append(tmp_overview)
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
tmp_link = page_1_c.find("a").get("href")
|
234
|
+
|
235
|
+
link_1_c.append(tmp_link)
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
i += 1
|
170
240
|
|
171
241
|
except:
|
172
242
|
|
243
|
+
pass
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
title_1_all = pd.DataFrame([title_1_a, title_1_b] + title_1_c)
|
248
|
+
|
249
|
+
overview_1_all = pd.DataFrame([overview_1_a, overview_1_b] + overview_1_c)
|
250
|
+
|
251
|
+
link_1_all = pd.DataFrame([link_1_a, link_1_b] + link_1_c)
|
252
|
+
|
253
|
+
|
254
|
+
|
255
|
+
page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
|
256
|
+
|
257
|
+
|
258
|
+
|
259
|
+
#page_2
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
result2 = requests.get(url2)
|
264
|
+
|
265
|
+
c2 = result2.content
|
266
|
+
|
267
|
+
soup2 = BeautifulSoup(c2)
|
268
|
+
|
269
|
+
|
270
|
+
|
173
|
-
|
271
|
+
title_2_c = []
|
174
|
-
|
272
|
+
|
175
|
-
|
273
|
+
overview_2_c = []
|
176
|
-
|
274
|
+
|
177
|
-
|
275
|
+
link_2_c = []
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
276
|
+
|
182
|
-
|
277
|
+
|
278
|
+
|
183
|
-
try: #
|
279
|
+
try: #cがある場合
|
280
|
+
|
184
|
-
|
281
|
+
for i in range(10):
|
282
|
+
|
283
|
+
page_2_c = soup2.find_all("div", {"class":"main-inner-c"})[i]
|
284
|
+
|
285
|
+
|
286
|
+
|
185
|
-
title
|
287
|
+
tmp_title = page_2_c.find("h3").text.replace("\u3000","").replace("\n","")
|
288
|
+
|
186
|
-
|
289
|
+
title_2_c.append(tmp_title)
|
290
|
+
|
291
|
+
|
292
|
+
|
187
|
-
overview
|
293
|
+
tmp_overview = page_2_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
|
294
|
+
|
188
|
-
|
295
|
+
overview_2_c.append(tmp_overview)
|
296
|
+
|
297
|
+
|
298
|
+
|
189
|
-
link
|
299
|
+
tmp_link = page_2_c.find("a").get("href")
|
300
|
+
|
301
|
+
link_2_c.append(tmp_link)
|
302
|
+
|
303
|
+
|
304
|
+
|
305
|
+
i += 1
|
190
306
|
|
191
307
|
except:
|
192
308
|
|
309
|
+
pass
|
310
|
+
|
311
|
+
|
312
|
+
|
193
|
-
|
313
|
+
title_2_all = pd.DataFrame(title_2_c)
|
194
|
-
|
314
|
+
|
195
|
-
|
315
|
+
overview_2_all = pd.DataFrame(overview_2_c)
|
196
|
-
|
316
|
+
|
197
|
-
|
317
|
+
link_2_all = pd.DataFrame(link_2_c)
|
318
|
+
|
319
|
+
|
320
|
+
|
198
|
-
|
321
|
+
page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
|
322
|
+
|
323
|
+
|
324
|
+
|
199
|
-
|
325
|
+
#page_3
|
326
|
+
|
327
|
+
|
328
|
+
|
200
|
-
|
329
|
+
result3 = requests.get(url3)
|
330
|
+
|
331
|
+
c3 = result3.content
|
332
|
+
|
333
|
+
soup3 = BeautifulSoup(c3)
|
334
|
+
|
335
|
+
|
336
|
+
|
201
|
-
title_
|
337
|
+
title_3_c = []
|
202
|
-
|
338
|
+
|
203
|
-
overview_
|
339
|
+
overview_3_c = []
|
204
|
-
|
340
|
+
|
205
|
-
link_
|
341
|
+
link_3_c = []
|
206
342
|
|
207
343
|
|
208
344
|
|
209
345
|
try: #cがある場合
|
210
346
|
|
211
|
-
|
347
|
+
for i in range(10):
|
212
|
-
|
348
|
+
|
213
|
-
page_
|
349
|
+
page_3_c = soup3.find_all("div", {"class":"main-inner-c"})[i]
|
214
350
|
|
215
351
|
|
216
352
|
|
217
|
-
tmp_title = page_
|
353
|
+
tmp_title = page_3_c.find("h3").text.replace("\u3000","").replace("\n","")
|
218
|
-
|
354
|
+
|
219
|
-
title_
|
355
|
+
title_3_c.append(tmp_title)
|
220
|
-
|
221
|
-
|
222
|
-
|
356
|
+
|
357
|
+
|
358
|
+
|
223
|
-
tmp_overview = page_
|
359
|
+
tmp_overview = page_3_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
|
224
|
-
|
360
|
+
|
225
|
-
overview_
|
361
|
+
overview_3_c.append(tmp_overview)
|
226
|
-
|
227
|
-
|
228
|
-
|
362
|
+
|
363
|
+
|
364
|
+
|
229
|
-
tmp_link = page_
|
365
|
+
tmp_link = page_3_c.find("a").get("href")
|
230
|
-
|
366
|
+
|
231
|
-
link_
|
367
|
+
link_3_c.append(tmp_link)
|
232
|
-
|
233
|
-
|
234
|
-
|
368
|
+
|
369
|
+
|
370
|
+
|
235
|
-
i += 1
|
371
|
+
i += 1
|
236
372
|
|
237
373
|
except:
|
238
374
|
|
239
|
-
pass
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
title_1_all = pd.DataFrame([title_1_a, title_1_b] + title_1_c)
|
244
|
-
|
245
|
-
overview_1_all = pd.DataFrame([overview_1_a, overview_1_b] + overview_1_c)
|
246
|
-
|
247
|
-
link_1_all = pd.DataFrame([link_1_a, link_1_b] + link_1_c)
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
#2ページ目
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
result2 = requests.get(url2)
|
260
|
-
|
261
|
-
c2 = result2.content
|
262
|
-
|
263
|
-
soup2 = BeautifulSoup(c2)
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
title_2_c = []
|
268
|
-
|
269
|
-
overview_2_c = []
|
270
|
-
|
271
|
-
link_2_c = []
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
try: #cがある場合
|
276
|
-
|
277
|
-
for i in range(10):
|
278
|
-
|
279
|
-
page_2_c = soup2.find_all("div", {"class":"main-inner-c"})[i]
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
tmp_title = page_2_c.find("h3").text.replace("\u3000","").replace("\n","")
|
284
|
-
|
285
|
-
title_2_c.append(tmp_title)
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
tmp_overview = page_2_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
|
290
|
-
|
291
|
-
overview_2_c.append(tmp_overview)
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
tmp_link = page_2_c.find("a").get("href")
|
296
|
-
|
297
|
-
link_2_c.append(tmp_link)
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
i += 1
|
302
|
-
|
303
|
-
except:
|
304
|
-
|
305
375
|
pass
|
306
376
|
|
307
|
-
|
308
|
-
|
309
|
-
title_2_all = pd.DataFrame(title_2_c)
|
310
|
-
|
311
|
-
overview_2_all = pd.DataFrame(overview_2_c)
|
312
|
-
|
313
|
-
link_2_all = pd.DataFrame(link_2_c)
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
#3ページ目
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
result3 = requests.get(url3)
|
326
|
-
|
327
|
-
c3 = result3.content
|
328
|
-
|
329
|
-
soup3 = BeautifulSoup(c3)
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
title_3_c = []
|
334
|
-
|
335
|
-
overview_3_c = []
|
336
|
-
|
337
|
-
link_3_c = []
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
try: #cがある場合
|
342
|
-
|
343
|
-
for i in range(10):
|
344
|
-
|
345
|
-
page_3_c = soup3.find_all("div", {"class":"main-inner-c"})[i]
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
tmp_title = page_3_c.find("h3").text.replace("\u3000","").replace("\n","")
|
350
|
-
|
351
|
-
title_3_c.append(tmp_title)
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
tmp_overview = page_3_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
|
356
|
-
|
357
|
-
overview_3_c.append(tmp_overview)
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
tmp_link = page_3_c.find("a").get("href")
|
362
|
-
|
363
|
-
link_3_c.append(tmp_link)
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
i += 1
|
368
|
-
|
369
|
-
except:
|
370
|
-
|
371
|
-
pass
|
372
|
-
|
373
377
|
|
374
378
|
|
375
379
|
title_3_all = pd.DataFrame(title_3_c)
|
@@ -384,7 +388,7 @@
|
|
384
388
|
|
385
389
|
|
386
390
|
|
387
|
-
#
|
391
|
+
#total
|
388
392
|
|
389
393
|
|
390
394
|
|
@@ -392,6 +396,8 @@
|
|
392
396
|
|
393
397
|
page_df.columns=["Title","Overview","Link"]
|
394
398
|
|
399
|
+
page_df.index = np.arange(1, len(page_df)+1)
|
400
|
+
|
395
401
|
|
396
402
|
|
397
403
|
page_df.to_excel("result.xlsx")
|
5
コード修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -428,7 +428,7 @@
|
|
428
428
|
|
429
429
|
url2 = "https://runda.jp/search/?q=" + keyword + "&page=2&search_type=" + period
|
430
430
|
|
431
|
-
url3 = "https://runda.jp/search/?q=" + keyword + "&page=
|
431
|
+
url3 = "https://runda.jp/search/?q=" + keyword + "&page=3&search_type=" + period
|
432
432
|
|
433
433
|
|
434
434
|
|
4
コード修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -140,8 +140,6 @@
|
|
140
140
|
|
141
141
|
|
142
142
|
|
143
|
-
#関数の定義
|
144
|
-
|
145
143
|
|
146
144
|
|
147
145
|
```python
|
3
コード修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -140,14 +140,18 @@
|
|
140
140
|
|
141
141
|
|
142
142
|
|
143
|
-
|
143
|
+
#関数の定義
|
144
|
+
|
145
|
+
|
146
|
+
|
144
|
-
|
147
|
+
```python
|
145
|
-
|
146
148
|
|
147
149
|
def scrayping():
|
148
150
|
|
149
151
|
|
150
152
|
|
153
|
+
#1ページ目
|
154
|
+
|
151
155
|
result1 = requests.get(url1)
|
152
156
|
|
153
157
|
c1 = result1.content
|
@@ -250,6 +254,8 @@
|
|
250
254
|
|
251
255
|
|
252
256
|
|
257
|
+
#2ページ目
|
258
|
+
|
253
259
|
|
254
260
|
|
255
261
|
result2 = requests.get(url2)
|
@@ -314,6 +320,8 @@
|
|
314
320
|
|
315
321
|
|
316
322
|
|
323
|
+
#3ページ目
|
324
|
+
|
317
325
|
|
318
326
|
|
319
327
|
result3 = requests.get(url3)
|
@@ -378,7 +386,7 @@
|
|
378
386
|
|
379
387
|
|
380
388
|
|
381
|
-
#
|
389
|
+
#1~3ページ統合
|
382
390
|
|
383
391
|
|
384
392
|
|
@@ -442,6 +450,10 @@
|
|
442
450
|
|
443
451
|
|
444
452
|
|
453
|
+
```
|
454
|
+
|
455
|
+
|
456
|
+
|
445
457
|
|
446
458
|
|
447
459
|
|
2
コードの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -148,10 +148,6 @@
|
|
148
148
|
|
149
149
|
|
150
150
|
|
151
|
-
#page_1
|
152
|
-
|
153
|
-
|
154
|
-
|
155
151
|
result1 = requests.get(url1)
|
156
152
|
|
157
153
|
c1 = result1.content
|
@@ -254,8 +250,6 @@
|
|
254
250
|
|
255
251
|
|
256
252
|
|
257
|
-
#page_2
|
258
|
-
|
259
253
|
|
260
254
|
|
261
255
|
result2 = requests.get(url2)
|
@@ -320,8 +314,6 @@
|
|
320
314
|
|
321
315
|
|
322
316
|
|
323
|
-
#page_3
|
324
|
-
|
325
317
|
|
326
318
|
|
327
319
|
result3 = requests.get(url3)
|
1
誤字修正
test
CHANGED
@@ -1 +1 @@
|
|
1
|
-
Python scheduleモジュール実行時のエラー
|
1
|
+
Python Webスクレイピングにおけるscheduleモジュール実行時のエラー
|
test
CHANGED
File without changes
|