質問編集履歴
2
URL追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -82,4 +82,4 @@
|
|
82
82
|
|
83
83
|
Python 3.6.8
|
84
84
|
|
85
|
-
[エラー発生時の
|
85
|
+
[エラー発生時のURL](https://www.ncbi.nlm.nih.gov/pubmed//?term=benzene)
|
1
スタックステート、エラー発生時のHTML、エラー発生時のソースコードの追加
test
CHANGED
File without changes
|
test
CHANGED
@@ -16,7 +16,25 @@
|
|
16
16
|
|
17
17
|
```
|
18
18
|
|
19
|
+
.
|
20
|
+
|
21
|
+
.
|
22
|
+
|
23
|
+
.
|
24
|
+
|
25
|
+
File "/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
|
26
|
+
|
27
|
+
raise exception_class(message, screen, stacktrace)
|
28
|
+
|
29
|
+
|
30
|
+
|
19
31
|
ElementNotVisibleException: element not visible
|
32
|
+
|
33
|
+
(Session info: chrome=73.0.3683.103)
|
34
|
+
|
35
|
+
(Driver info: chromedriver=2.38.552518 (183d19265345f54ce39cbb94cf81ba5f15905011),platform=Mac OS X 10.14.4 x86_64)
|
36
|
+
|
37
|
+
|
20
38
|
|
21
39
|
```
|
22
40
|
|
@@ -26,147 +44,25 @@
|
|
26
44
|
|
27
45
|
```Python
|
28
46
|
|
29
|
-
|
47
|
+
if exception_class == ErrorInResponseException:
|
30
48
|
|
31
|
-
i
|
49
|
+
raise exception_class(response, message)
|
32
50
|
|
33
|
-
|
51
|
+
elif exception_class == UnexpectedAlertPresentException:
|
34
52
|
|
35
|
-
|
53
|
+
alert_text = None
|
36
54
|
|
37
|
-
|
55
|
+
if 'data' in value:
|
38
56
|
|
57
|
+
alert_text = value['data'].get('text')
|
39
58
|
|
59
|
+
elif 'alert' in value:
|
40
60
|
|
41
|
-
|
61
|
+
alert_text = value['alert'].get('text')
|
42
62
|
|
43
|
-
|
63
|
+
raise exception_class(message, screen, stacktrace, alert_text)
|
44
64
|
|
45
|
-
keyword = "benzene"
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
data = get_pubmed_info(keyword)
|
50
|
-
|
51
|
-
get_dataframe(data, keyword)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
def get_pubmed_info(keyword):
|
56
|
-
|
57
|
-
#webブラウザ指定(Chrome)
|
58
|
-
|
59
|
-
driver = webdriver.Chrome()
|
60
|
-
|
61
|
-
url = "https://www.ncbi.nlm.nih.gov/pubmed/?term="+keyword.replace(' ','+')
|
62
|
-
|
63
|
-
#ページ遷移
|
64
|
-
|
65
|
-
driver.get(url)
|
66
|
-
|
67
|
-
#暗黙の待機
|
68
|
-
|
69
|
-
driver.implicitly_wait(2)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
#専用データ作成
|
74
|
-
|
75
|
-
data = []
|
76
|
-
|
77
|
-
#ページ数取得
|
78
|
-
|
79
|
-
try:
|
80
|
-
|
81
|
-
page_num = int(driver.find_element_by_id('pageno').get_attribute('last'))
|
82
|
-
|
83
|
-
#ページ数「1」の時例外扱いになる時があるため、その対策
|
84
|
-
|
85
|
-
except:
|
86
|
-
|
87
|
-
page_num = 1
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
#全ページ内のクローリング
|
92
|
-
|
93
|
-
for i in range(int(page_num)):
|
94
|
-
|
95
|
-
#abstract(text)方式の論文ページ表示
|
96
|
-
|
97
|
-
|
65
|
+
raise exception_class(message, screen, stacktrace)
|
98
|
-
|
99
|
-
[tag for tag in driver.find_elements_by_tag_name('input')
|
100
|
-
|
101
|
-
if (tag.get_attribute('value') == 'abstract')][1].click()
|
102
|
-
|
103
|
-
#表示順に上から論文タイトル、要旨、keywords取得
|
104
|
-
|
105
|
-
source = driver.page_source.split('\n\n\n')
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
for paper in source:
|
110
|
-
|
111
|
-
block = paper.split('\n\n')
|
112
|
-
|
113
|
-
if(len(block) >= 6):
|
114
|
-
|
115
|
-
data.append(block)
|
116
|
-
|
117
|
-
driver.back()
|
118
|
-
|
119
|
-
time.sleep(1)
|
120
|
-
|
121
|
-
if (i < page_num - 1):
|
122
|
-
|
123
|
-
try:
|
124
|
-
|
125
|
-
driver.find_element_by_css_selector('a.active.page_link.next').click()
|
126
|
-
|
127
|
-
print(i)
|
128
|
-
|
129
|
-
time.sleep(3)
|
130
|
-
|
131
|
-
except:
|
132
|
-
|
133
|
-
driver.current_url
|
134
|
-
|
135
|
-
time.sleep(5)
|
136
|
-
|
137
|
-
driver.find_element_by_css_selector('a.active.page_link.next').click()
|
138
|
-
|
139
|
-
print(i)
|
140
|
-
|
141
|
-
time.sleep(3)
|
142
|
-
|
143
|
-
else:
|
144
|
-
|
145
|
-
driver.back()
|
146
|
-
|
147
|
-
time.sleep(3)
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
"""if element not visible
|
152
|
-
|
153
|
-
if element not visible:
|
154
|
-
|
155
|
-
driver.current_url
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
driver.close()"""
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
return data
|
164
|
-
|
165
|
-
if __name__ == "__main__":
|
166
|
-
|
167
|
-
main()
|
168
|
-
|
169
|
-
|
170
66
|
|
171
67
|
```
|
172
68
|
|
@@ -185,3 +81,5 @@
|
|
185
81
|
|
186
82
|
|
187
83
|
Python 3.6.8
|
84
|
+
|
85
|
+
[エラー発生時のHTML](htps://www.ncbi.nlm.nih.gov/pubmed)
|