質問編集履歴

2

URL追加

2019/04/19 08:29

投稿

motz-motz
motz-motz

スコア13

test CHANGED
File without changes
test CHANGED
@@ -82,4 +82,4 @@
82
82
 
83
83
  Python 3.6.8
84
84
 
85
- [エラー発生時のHTML](htps://www.ncbi.nlm.nih.gov/pubmed)
85
+ [エラー発生時のURL](https://www.ncbi.nlm.nih.gov/pubmed//?term=benzene)

1

スタックステート、エラー発生時のHTML、エラー発生時のソースコードの追加

2019/04/19 08:28

投稿

motz-motz
motz-motz

スコア13

test CHANGED
File without changes
test CHANGED
@@ -16,7 +16,25 @@
16
16
 
17
17
  ```
18
18
 
19
+ .
20
+
21
+ .
22
+
23
+ .
24
+
25
+ File "/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
26
+
27
+ raise exception_class(message, screen, stacktrace)
28
+
29
+
30
+
19
31
  ElementNotVisibleException: element not visible
32
+
33
+ (Session info: chrome=73.0.3683.103)
34
+
35
+ (Driver info: chromedriver=2.38.552518 (183d19265345f54ce39cbb94cf81ba5f15905011),platform=Mac OS X 10.14.4 x86_64)
36
+
37
+
20
38
 
21
39
  ```
22
40
 
@@ -26,147 +44,25 @@
26
44
 
27
45
  ```Python
28
46
 
29
- #abstract(text)でクローリング
47
+ if exception_class == ErrorInResponseException:
30
48
 
31
- import time
49
+ raise exception_class(response, message)
32
50
 
33
- from selenium import webdriver
51
+ elif exception_class == UnexpectedAlertPresentException:
34
52
 
35
- import pandas as pd
53
+ alert_text = None
36
54
 
37
- #from selenium.webdriver.support import expected_conditions as ec
55
+ if 'data' in value:
38
56
 
57
+ alert_text = value['data'].get('text')
39
58
 
59
+ elif 'alert' in value:
40
60
 
41
- def main():
61
+ alert_text = value['alert'].get('text')
42
62
 
43
- #検索キーワード入力
63
+ raise exception_class(message, screen, stacktrace, alert_text)
44
64
 
45
- keyword = "benzene"
46
-
47
-
48
-
49
- data = get_pubmed_info(keyword)
50
-
51
- get_dataframe(data, keyword)
52
-
53
-
54
-
55
- def get_pubmed_info(keyword):
56
-
57
- #webブラウザ指定(Chrome)
58
-
59
- driver = webdriver.Chrome()
60
-
61
- url = "https://www.ncbi.nlm.nih.gov/pubmed/?term="+keyword.replace(' ','+')
62
-
63
- #ページ遷移
64
-
65
- driver.get(url)
66
-
67
- #暗黙の待機
68
-
69
- driver.implicitly_wait(2)
70
-
71
-
72
-
73
- #専用データ作成
74
-
75
- data = []
76
-
77
- #ページ数取得
78
-
79
- try:
80
-
81
- page_num = int(driver.find_element_by_id('pageno').get_attribute('last'))
82
-
83
- #ページ数「1」の時例外扱いになる時があるため、その対策
84
-
85
- except:
86
-
87
- page_num = 1
88
-
89
-
90
-
91
- #全ページ内のクローリング
92
-
93
- for i in range(int(page_num)):
94
-
95
- #abstract(text)方式の論文ページ表示
96
-
97
- driver.find_element_by_name('Display').click()
65
+ raise exception_class(message, screen, stacktrace)
98
-
99
- [tag for tag in driver.find_elements_by_tag_name('input')
100
-
101
- if (tag.get_attribute('value') == 'abstract')][1].click()
102
-
103
- #表示順に上から論文タイトル、要旨、keywords取得
104
-
105
- source = driver.page_source.split('\n\n\n')
106
-
107
-
108
-
109
- for paper in source:
110
-
111
- block = paper.split('\n\n')
112
-
113
- if(len(block) >= 6):
114
-
115
- data.append(block)
116
-
117
- driver.back()
118
-
119
- time.sleep(1)
120
-
121
- if (i < page_num - 1):
122
-
123
- try:
124
-
125
- driver.find_element_by_css_selector('a.active.page_link.next').click()
126
-
127
- print(i)
128
-
129
- time.sleep(3)
130
-
131
- except:
132
-
133
- driver.current_url
134
-
135
- time.sleep(5)
136
-
137
- driver.find_element_by_css_selector('a.active.page_link.next').click()
138
-
139
- print(i)
140
-
141
- time.sleep(3)
142
-
143
- else:
144
-
145
- driver.back()
146
-
147
- time.sleep(3)
148
-
149
-
150
-
151
- """if element not visible
152
-
153
- if element not visible:
154
-
155
- driver.current_url
156
-
157
-
158
-
159
- driver.close()"""
160
-
161
-
162
-
163
- return data
164
-
165
- if __name__ == "__main__":
166
-
167
- main()
168
-
169
-
170
66
 
171
67
  ```
172
68
 
@@ -185,3 +81,5 @@
185
81
 
186
82
 
187
83
  Python 3.6.8
84
+
85
+ [エラー発生時のHTML](htps://www.ncbi.nlm.nih.gov/pubmed)