質問編集履歴

7

コード修正

2021/02/24 06:53

投稿

SMRKW
SMRKW

スコア2

test CHANGED
File without changes
test CHANGED
@@ -210,7 +210,7 @@
210
210
 
211
211
 
212
212
 
213
- try: #cがある場合
213
+ try: #cが8個ない場合に対応
214
214
 
215
215
  for i in range(8):
216
216
 
@@ -276,7 +276,7 @@
276
276
 
277
277
 
278
278
 
279
- try: #cがある場合
279
+ try: #cが8個ない場合に対応
280
280
 
281
281
  for i in range(10):
282
282
 
@@ -342,7 +342,7 @@
342
342
 
343
343
 
344
344
 
345
- try: #cがある場合
345
+ try: #cが8個ない場合に対応
346
346
 
347
347
  for i in range(10):
348
348
 

6

コード修正

2021/02/24 06:53

投稿

SMRKW
SMRKW

スコア2

test CHANGED
File without changes
test CHANGED
@@ -14,6 +14,8 @@
14
14
 
15
15
  関数の中にtry、exceptを組み込んでいる影響でしょうか。
16
16
 
17
+ ※if elseに書き換えても同様のメッセージが発生しましたので更新したコードを記載いたします。
18
+
17
19
 
18
20
 
19
21
  ### 発生している問題・エラーメッセージ
@@ -148,7 +150,9 @@
148
150
 
149
151
 
150
152
 
151
- #1ページ目
153
+ #page_1
154
+
155
+
152
156
 
153
157
  result1 = requests.get(url1)
154
158
 
@@ -158,218 +162,218 @@
158
162
 
159
163
 
160
164
 
165
+ if soup1.find("div", {"class":"main-inner-a"}) is None:
166
+
167
+ title_1_a = []
168
+
169
+ overview_1_a = []
170
+
171
+ link_1_a = []
172
+
173
+ else:
174
+
161
- page_1_a = soup1.find("div", {"class":"main-inner-a"})
175
+ page_1_a = soup1.find("div", {"class":"main-inner-a"})
162
-
163
- try: #aがある場合
176
+
164
-
165
- title_1_a = page_1_a.find("h3").text.replace("\u3000","").replace("\n","")
177
+ title_1_a = page_1_a.find("h3").text.replace("\u3000","").replace("\n","")
166
-
178
+
167
- overview_1_a = page_1_a.find("p").text.replace("\u3000","").replace("\n","")
179
+ overview_1_a = page_1_a.find("p").text.replace("\u3000","").replace("\n","")
168
-
180
+
169
- link_1_a = page_1_a.find("a").get("href")
181
+ link_1_a = page_1_a.find("a").get("href")
182
+
183
+
184
+
185
+ if soup1.find("div", {"class":"main-inner-b"}) is None:
186
+
187
+ title_1_b = []
188
+
189
+ overview_1_b = []
190
+
191
+ link_1_b = []
192
+
193
+ else:
194
+
195
+ page_1_b = soup1.find("div", {"class":"main-inner-b"})
196
+
197
+ title_1_b = page_1_b.find("h3").text.replace("\u3000","").replace("\n","")
198
+
199
+ overview_1_b = page_1_b.find("p").text.replace("\u3000","").replace("\n","")
200
+
201
+ link_1_b = page_1_b.find("a").get("href")
202
+
203
+
204
+
205
+ title_1_c = []
206
+
207
+ overview_1_c = []
208
+
209
+ link_1_c = []
210
+
211
+
212
+
213
+ try: #cがある場合
214
+
215
+ for i in range(8):
216
+
217
+ page_1_c = soup1.find_all("div", {"class":"main-inner-c"})[i]
218
+
219
+
220
+
221
+ tmp_title = page_1_c.find("h3").text.replace("\u3000","").replace("\n","")
222
+
223
+ title_1_c.append(tmp_title)
224
+
225
+
226
+
227
+ tmp_overview = page_1_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
228
+
229
+ overview_1_c.append(tmp_overview)
230
+
231
+
232
+
233
+ tmp_link = page_1_c.find("a").get("href")
234
+
235
+ link_1_c.append(tmp_link)
236
+
237
+
238
+
239
+ i += 1
170
240
 
171
241
  except:
172
242
 
243
+ pass
244
+
245
+
246
+
247
+ title_1_all = pd.DataFrame([title_1_a, title_1_b] + title_1_c)
248
+
249
+ overview_1_all = pd.DataFrame([overview_1_a, overview_1_b] + overview_1_c)
250
+
251
+ link_1_all = pd.DataFrame([link_1_a, link_1_b] + link_1_c)
252
+
253
+
254
+
255
+ page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
256
+
257
+
258
+
259
+ #page_2
260
+
261
+
262
+
263
+ result2 = requests.get(url2)
264
+
265
+ c2 = result2.content
266
+
267
+ soup2 = BeautifulSoup(c2)
268
+
269
+
270
+
173
- title_1_a = []
271
+ title_2_c = []
174
-
272
+
175
- overview_1_a = []
273
+ overview_2_c = []
176
-
274
+
177
- link_1_a = []
275
+ link_2_c = []
178
-
179
-
180
-
181
- page_1_b = soup1.find("div", {"class":"main-inner-b"})
276
+
182
-
277
+
278
+
183
- try: #bがある場合
279
+ try: #cがある場合
280
+
184
-
281
+ for i in range(10):
282
+
283
+ page_2_c = soup2.find_all("div", {"class":"main-inner-c"})[i]
284
+
285
+
286
+
185
- title_1_b = page_1_b.find("h3").text.replace("\u3000","").replace("\n","")
287
+ tmp_title = page_2_c.find("h3").text.replace("\u3000","").replace("\n","")
288
+
186
-
289
+ title_2_c.append(tmp_title)
290
+
291
+
292
+
187
- overview_1_b = page_1_b.find("p").text.replace("\u3000","").replace("\n","")
293
+ tmp_overview = page_2_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
294
+
188
-
295
+ overview_2_c.append(tmp_overview)
296
+
297
+
298
+
189
- link_1_b = page_1_b.find("a").get("href")
299
+ tmp_link = page_2_c.find("a").get("href")
300
+
301
+ link_2_c.append(tmp_link)
302
+
303
+
304
+
305
+ i += 1
190
306
 
191
307
  except:
192
308
 
309
+ pass
310
+
311
+
312
+
193
- title_1_b = []
313
+ title_2_all = pd.DataFrame(title_2_c)
194
-
314
+
195
- overview_1_b = []
315
+ overview_2_all = pd.DataFrame(overview_2_c)
196
-
316
+
197
- link_1_b = []
317
+ link_2_all = pd.DataFrame(link_2_c)
318
+
319
+
320
+
198
-
321
+ page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
322
+
323
+
324
+
199
-
325
+ #page_3
326
+
327
+
328
+
200
-
329
+ result3 = requests.get(url3)
330
+
331
+ c3 = result3.content
332
+
333
+ soup3 = BeautifulSoup(c3)
334
+
335
+
336
+
201
- title_1_c = []
337
+ title_3_c = []
202
-
338
+
203
- overview_1_c = []
339
+ overview_3_c = []
204
-
340
+
205
- link_1_c = []
341
+ link_3_c = []
206
342
 
207
343
 
208
344
 
209
345
  try: #cがある場合
210
346
 
211
- for i in range(8):
347
+ for i in range(10):
212
-
348
+
213
- page_1_c = soup1.find_all("div", {"class":"main-inner-c"})[i]
349
+ page_3_c = soup3.find_all("div", {"class":"main-inner-c"})[i]
214
350
 
215
351
 
216
352
 
217
- tmp_title = page_1_c.find("h3").text.replace("\u3000","").replace("\n","")
353
+ tmp_title = page_3_c.find("h3").text.replace("\u3000","").replace("\n","")
218
-
354
+
219
- title_1_c.append(tmp_title)
355
+ title_3_c.append(tmp_title)
220
-
221
-
222
-
356
+
357
+
358
+
223
- tmp_overview = page_1_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
359
+ tmp_overview = page_3_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
224
-
360
+
225
- overview_1_c.append(tmp_overview)
361
+ overview_3_c.append(tmp_overview)
226
-
227
-
228
-
362
+
363
+
364
+
229
- tmp_link = page_1_c.find("a").get("href")
365
+ tmp_link = page_3_c.find("a").get("href")
230
-
366
+
231
- link_1_c.append(tmp_link)
367
+ link_3_c.append(tmp_link)
232
-
233
-
234
-
368
+
369
+
370
+
235
- i += 1
371
+ i += 1
236
372
 
237
373
  except:
238
374
 
239
- pass
240
-
241
-
242
-
243
- title_1_all = pd.DataFrame([title_1_a, title_1_b] + title_1_c)
244
-
245
- overview_1_all = pd.DataFrame([overview_1_a, overview_1_b] + overview_1_c)
246
-
247
- link_1_all = pd.DataFrame([link_1_a, link_1_b] + link_1_c)
248
-
249
-
250
-
251
- page1_df = pd.concat([title_1_all, overview_1_all, link_1_all], axis=1)
252
-
253
-
254
-
255
- #2ページ目
256
-
257
-
258
-
259
- result2 = requests.get(url2)
260
-
261
- c2 = result2.content
262
-
263
- soup2 = BeautifulSoup(c2)
264
-
265
-
266
-
267
- title_2_c = []
268
-
269
- overview_2_c = []
270
-
271
- link_2_c = []
272
-
273
-
274
-
275
- try: #cがある場合
276
-
277
- for i in range(10):
278
-
279
- page_2_c = soup2.find_all("div", {"class":"main-inner-c"})[i]
280
-
281
-
282
-
283
- tmp_title = page_2_c.find("h3").text.replace("\u3000","").replace("\n","")
284
-
285
- title_2_c.append(tmp_title)
286
-
287
-
288
-
289
- tmp_overview = page_2_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
290
-
291
- overview_2_c.append(tmp_overview)
292
-
293
-
294
-
295
- tmp_link = page_2_c.find("a").get("href")
296
-
297
- link_2_c.append(tmp_link)
298
-
299
-
300
-
301
- i += 1
302
-
303
- except:
304
-
305
375
  pass
306
376
 
307
-
308
-
309
- title_2_all = pd.DataFrame(title_2_c)
310
-
311
- overview_2_all = pd.DataFrame(overview_2_c)
312
-
313
- link_2_all = pd.DataFrame(link_2_c)
314
-
315
-
316
-
317
- page2_df = pd.concat([title_2_all, overview_2_all, link_2_all], axis=1)
318
-
319
-
320
-
321
- #3ページ目
322
-
323
-
324
-
325
- result3 = requests.get(url3)
326
-
327
- c3 = result3.content
328
-
329
- soup3 = BeautifulSoup(c3)
330
-
331
-
332
-
333
- title_3_c = []
334
-
335
- overview_3_c = []
336
-
337
- link_3_c = []
338
-
339
-
340
-
341
- try: #cがある場合
342
-
343
- for i in range(10):
344
-
345
- page_3_c = soup3.find_all("div", {"class":"main-inner-c"})[i]
346
-
347
-
348
-
349
- tmp_title = page_3_c.find("h3").text.replace("\u3000","").replace("\n","")
350
-
351
- title_3_c.append(tmp_title)
352
-
353
-
354
-
355
- tmp_overview = page_3_c.find_all("p")[2].text.replace("\u3000","").replace("\n","")
356
-
357
- overview_3_c.append(tmp_overview)
358
-
359
-
360
-
361
- tmp_link = page_3_c.find("a").get("href")
362
-
363
- link_3_c.append(tmp_link)
364
-
365
-
366
-
367
- i += 1
368
-
369
- except:
370
-
371
- pass
372
-
373
377
 
374
378
 
375
379
  title_3_all = pd.DataFrame(title_3_c)
@@ -384,7 +388,7 @@
384
388
 
385
389
 
386
390
 
387
- #1~3ページ統合
391
+ #total
388
392
 
389
393
 
390
394
 
@@ -392,6 +396,8 @@
392
396
 
393
397
  page_df.columns=["Title","Overview","Link"]
394
398
 
399
+ page_df.index = np.arange(1, len(page_df)+1)
400
+
395
401
 
396
402
 
397
403
  page_df.to_excel("result.xlsx")

5

コード修正

2021/02/24 06:52

投稿

SMRKW
SMRKW

スコア2

test CHANGED
File without changes
test CHANGED
@@ -428,7 +428,7 @@
428
428
 
429
429
  url2 = "https://runda.jp/search/?q=" + keyword + "&page=2&search_type=" + period
430
430
 
431
- url3 = "https://runda.jp/search/?q=" + keyword + "&page=2&search_type=" + period
431
+ url3 = "https://runda.jp/search/?q=" + keyword + "&page=3&search_type=" + period
432
432
 
433
433
 
434
434
 

4

コード修正

2021/02/24 05:32

投稿

SMRKW
SMRKW

スコア2

test CHANGED
File without changes
test CHANGED
@@ -140,8 +140,6 @@
140
140
 
141
141
 
142
142
 
143
- #関数の定義
144
-
145
143
 
146
144
 
147
145
  ```python

3

コード修正

2021/02/24 05:28

投稿

SMRKW
SMRKW

スコア2

test CHANGED
File without changes
test CHANGED
@@ -140,14 +140,18 @@
140
140
 
141
141
 
142
142
 
143
-
143
+ #関数の定義
144
+
145
+
146
+
144
-
147
+ ```python
145
-
146
148
 
147
149
  def scrayping():
148
150
 
149
151
 
150
152
 
153
+ #1ページ目
154
+
151
155
  result1 = requests.get(url1)
152
156
 
153
157
  c1 = result1.content
@@ -250,6 +254,8 @@
250
254
 
251
255
 
252
256
 
257
+ #2ページ目
258
+
253
259
 
254
260
 
255
261
  result2 = requests.get(url2)
@@ -314,6 +320,8 @@
314
320
 
315
321
 
316
322
 
323
+ #3ページ目
324
+
317
325
 
318
326
 
319
327
  result3 = requests.get(url3)
@@ -378,7 +386,7 @@
378
386
 
379
387
 
380
388
 
381
- #page1~3統合
389
+ #13ページ統合
382
390
 
383
391
 
384
392
 
@@ -442,6 +450,10 @@
442
450
 
443
451
 
444
452
 
453
+ ```
454
+
455
+
456
+
445
457
 
446
458
 
447
459
 

2

コードの修正

2021/02/24 05:27

投稿

SMRKW
SMRKW

スコア2

test CHANGED
File without changes
test CHANGED
@@ -148,10 +148,6 @@
148
148
 
149
149
 
150
150
 
151
- #page_1
152
-
153
-
154
-
155
151
  result1 = requests.get(url1)
156
152
 
157
153
  c1 = result1.content
@@ -254,8 +250,6 @@
254
250
 
255
251
 
256
252
 
257
- #page_2
258
-
259
253
 
260
254
 
261
255
  result2 = requests.get(url2)
@@ -320,8 +314,6 @@
320
314
 
321
315
 
322
316
 
323
- #page_3
324
-
325
317
 
326
318
 
327
319
  result3 = requests.get(url3)

1

誤字修正

2021/02/24 05:16

投稿

SMRKW
SMRKW

スコア2

test CHANGED
@@ -1 +1 @@
1
- Python scheduleモジュール実行時のエラー
1
+ Python Webスクレイピングにおけるscheduleモジュール実行時のエラー
test CHANGED
File without changes