質問編集履歴
3
エラー箇所の追記
title
CHANGED
File without changes
|
body
CHANGED
@@ -13,6 +13,7 @@
|
|
13
13
|
### 該当のソースコード
|
14
14
|
|
15
15
|
```ここに言語名を入力
|
16
|
+
#モジュールのインポート
|
16
17
|
from selenium import webdriver
|
17
18
|
import os
|
18
19
|
from bs4 import BeautifulSoup
|
@@ -22,35 +23,39 @@
|
|
22
23
|
from selenium.webdriver.support import expected_conditions as EC
|
23
24
|
from selenium.webdriver.common.by import By
|
24
25
|
|
26
|
+
#取得したい要素(職業名や仕事内容)の変数定義
|
25
27
|
job_cards = 'a.catch'
|
26
28
|
job_names = 'h2.job'
|
27
29
|
job_descriptions = 'div.jobCatch'
|
28
30
|
job_requirements_musts = 'div.mustSet'
|
29
31
|
next_bottun = 'a.btn.next'
|
30
32
|
|
33
|
+
#webdriverの設定・URLへ遷移・要素が読み込まれるまでの待機時間の設定
|
31
34
|
driver = webdriver.Chrome(executable_path="C:...")
|
32
35
|
url = 'https://en-ambi.com/'
|
33
36
|
wait = WebDriverWait(driver, 10)
|
34
37
|
driver.get(url)
|
35
38
|
|
36
39
|
def main():
|
37
|
-
while True:
|
40
|
+
while True: #次へボタンがあるまで繰り返し
|
38
41
|
if len(driver.find_element_by_css_selector(next_bottun).text) > 0:
|
42
|
+
#ページ内の求人詳細に遷移する要素の全取得
|
39
43
|
cards = driver.find_elements_by_css_selector(job_cards)
|
40
|
-
for card in cards:
|
44
|
+
for card in cards: #↑で取得した要素をクリックし遷移したページで下記処理を行う
|
41
45
|
try:
|
42
46
|
card.click()
|
47
|
+
#恐らくここまでは正常に動いていて下記から上手く処理できずエラーが起こっている
|
43
48
|
wait.until(EC.presence_of_element_located((By.ID, "descBase")))
|
44
|
-
job_name = driver.find_element_by_css_selector(job_names).text
|
49
|
+
job_name = driver.find_element_by_css_selector(job_names).text #職業名をテキストで取得する
|
45
50
|
job_description = driver.find_element_by_css_selector(job_descriptions).text
|
46
51
|
job_requirements_must = driver.find_element_by_css_selector(job_requirements_musts).text
|
47
|
-
se = pd.Series([job_name,job_description,job_requirements_must],['job_name','job_description','job_requirements_must',])
|
52
|
+
se = pd.Series([job_name,job_description,job_requirements_must],['job_name','job_description','job_requirements_must',])
|
48
53
|
df = pd.DataFrame()
|
49
54
|
df = df.append(se,ignore_index=True)
|
50
55
|
driver.back()
|
51
56
|
except Exception as e:# -*- coding: utf-8 -*-
|
52
57
|
print(e)
|
53
|
-
next = driver.find_element_by_css_selector(next_bottun).get_attribute("href")
|
58
|
+
next = driver.find_element_by_css_selector(next_bottun).get_attribute("href") #ページ内の次へボタンの要素を取得
|
54
59
|
driver.get(next)
|
55
60
|
else:
|
56
61
|
print('No page')
|
2
タイトルの編集
title
CHANGED
@@ -1,1 +1,1 @@
|
|
1
|
-
seleniumで情報を取得したい
|
1
|
+
selenium(Python)で情報を取得したいが、要素が読み込まれるのを待機できていない?等の理由で取得できない
|
body
CHANGED
File without changes
|
1
文字の修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -28,8 +28,8 @@
|
|
28
28
|
job_requirements_musts = 'div.mustSet'
|
29
29
|
next_bottun = 'a.btn.next'
|
30
30
|
|
31
|
-
driver = webdriver.Chrome(executable_path="C:
|
31
|
+
driver = webdriver.Chrome(executable_path="C:...")
|
32
|
-
url = 'https://en-ambi.com/
|
32
|
+
url = 'https://en-ambi.com/'
|
33
33
|
wait = WebDriverWait(driver, 10)
|
34
34
|
driver.get(url)
|
35
35
|
|