質問編集履歴
4
ソースコードの見え方がおかしかったので修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -324,7 +324,7 @@
|
|
324
324
|
|
325
325
|
if __name__ == '__main__':
|
326
326
|
|
327
|
-
main()
|
327
|
+
main()
|
328
328
|
|
329
329
|
```
|
330
330
|
|
3
ソースコードの見え方がおかしかったものを修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -182,13 +182,11 @@
|
|
182
182
|
|
183
183
|
### 該当のソースコード
|
184
184
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
185
|
+
PYTHON3
|
186
|
+
|
187
|
+
|
188
|
+
|
190
|
-
|
189
|
+
```
|
191
|
-
|
192
190
|
|
193
191
|
from selenium import webdriver
|
194
192
|
|
@@ -328,6 +326,10 @@
|
|
328
326
|
|
329
327
|
main()```
|
330
328
|
|
329
|
+
```
|
330
|
+
|
331
|
+
|
332
|
+
|
331
333
|
|
332
334
|
|
333
335
|
### 試したこと
|
2
ソースコードの表示がおかしかったので修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -184,9 +184,9 @@
|
|
184
184
|
|
185
185
|
|
186
186
|
|
187
|
-
|
187
|
+
|
188
|
-
|
188
|
+
|
189
|
-
|
189
|
+
-*- encoding: utf-8 -*-
|
190
190
|
|
191
191
|
|
192
192
|
|
1
エラー全文追記 ソースコードを修正 質問に回答
test
CHANGED
File without changes
|
test
CHANGED
@@ -50,8 +50,134 @@
|
|
50
50
|
|
51
51
|
```
|
52
52
|
|
53
|
+
止まる箇所①(検索ボタンを押した先に該当ページが無い場合)のエラー
|
54
|
+
|
55
|
+
DevTools listening on ws://127.0.0.1:12787/devtools/browser/017c4221-d8b9-4b81-9987-3def05eee8b0
|
56
|
+
|
57
|
+
1302
|
58
|
+
|
59
|
+
Traceback (most recent call last):
|
60
|
+
|
61
|
+
File "sample.py", line 41, in scraping
|
62
|
+
|
63
|
+
meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
|
64
|
+
|
65
|
+
IndexError: list index out of range
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
During handling of the above exception, another exception occurred:
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
Traceback (most recent call last):
|
74
|
+
|
75
|
+
File "sample.py", line 71, in <module>
|
76
|
+
|
77
|
+
main()
|
78
|
+
|
79
|
+
File "sample.py", line 30, in main
|
80
|
+
|
81
|
+
scraping(browser,line.rstrip())
|
82
|
+
|
83
|
+
File "sample.py", line 67, in scraping
|
84
|
+
|
85
|
+
print(traceback.format_exc(sys.exc_info()[2]))
|
86
|
+
|
87
|
+
File "C:\Python\lib\traceback.py", line 163, in format_exc
|
88
|
+
|
89
|
+
return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
|
90
|
+
|
91
|
+
File "C:\Python\lib\traceback.py", line 117, in format_exception
|
92
|
+
|
93
|
+
type(value), value, tb, limit=limit).format(chain=chain))
|
94
|
+
|
95
|
+
File "C:\Python\lib\traceback.py", line 497, in __init__
|
96
|
+
|
97
|
+
capture_locals=capture_locals)
|
98
|
+
|
99
|
+
File "C:\Python\lib\traceback.py", line 332, in extract
|
100
|
+
|
101
|
+
if limit >= 0:
|
102
|
+
|
53
103
|
TypeError: '>=' not supported between instances of 'traceback' and 'int'
|
54
104
|
|
105
|
+
|
106
|
+
|
107
|
+
止まる箇所②(ボタンがクリックできない場合)のエラー
|
108
|
+
|
109
|
+
DevTools listening on ws://127.0.0.1:12410/devtools/browser/94524637-85ee-4c7e-a0c6-0cc2f1c12ad3
|
110
|
+
|
111
|
+
1305
|
112
|
+
|
113
|
+
Traceback (most recent call last):
|
114
|
+
|
115
|
+
File "sample.py", line 43, in scraping
|
116
|
+
|
117
|
+
browser.find_element_by_css_selector('#quarterInfoLink').click()
|
118
|
+
|
119
|
+
File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 80, in click
|
120
|
+
|
121
|
+
self._execute(Command.CLICK_ELEMENT)
|
122
|
+
|
123
|
+
File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 628, in _execute
|
124
|
+
|
125
|
+
return self._parent.execute(command, params)
|
126
|
+
|
127
|
+
File "C:\Python\lib\selenium\webdriver\remote\webdriver.py", line 312, in execute
|
128
|
+
|
129
|
+
self.error_handler.check_response(response)
|
130
|
+
|
131
|
+
File "C:\Python\lib\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
|
132
|
+
|
133
|
+
raise exception_class(message, screen, stacktrace)
|
134
|
+
|
135
|
+
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <a id="quarterInfoLink" href="#" class="btn is-disabled">...</a> is not clickable at point (382, 252). Other element would receive the click: <li class="item">...</li>
|
136
|
+
|
137
|
+
(Session info: chrome=67.0.3396.99)
|
138
|
+
|
139
|
+
(Driver info: chromedriver=2.35.528161 (5b82f2d2aae0ca24b877009200ced9065a772e73),platform=Windows NT 10.0.16299 x86_64)
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
During handling of the above exception, another exception occurred:
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
Traceback (most recent call last):
|
150
|
+
|
151
|
+
File "sample.py", line 71, in <module>
|
152
|
+
|
153
|
+
main()
|
154
|
+
|
155
|
+
File "sample.py", line 30, in main
|
156
|
+
|
157
|
+
scraping(browser,line.rstrip())
|
158
|
+
|
159
|
+
File "sample.py", line 67, in scraping
|
160
|
+
|
161
|
+
print(traceback.format_exc(sys.exc_info()[2]))
|
162
|
+
|
163
|
+
File "C:\Python\lib\traceback.py", line 163, in format_exc
|
164
|
+
|
165
|
+
return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
|
166
|
+
|
167
|
+
File "C:\Python\lib\traceback.py", line 117, in format_exception
|
168
|
+
|
169
|
+
type(value), value, tb, limit=limit).format(chain=chain))
|
170
|
+
|
171
|
+
File "C:\Python\lib\traceback.py", line 497, in __init__
|
172
|
+
|
173
|
+
capture_locals=capture_locals)
|
174
|
+
|
175
|
+
File "C:\Python\lib\traceback.py", line 332, in extract
|
176
|
+
|
177
|
+
if limit >= 0:
|
178
|
+
|
179
|
+
TypeError: '>=' not supported between instances of 'traceback' and 'int'
|
180
|
+
|
55
181
|
```
|
56
182
|
|
57
183
|
### 該当のソースコード
|
@@ -60,6 +186,10 @@
|
|
60
186
|
|
61
187
|
```PYTHON3
|
62
188
|
|
189
|
+
# -*- encoding: utf-8 -*-
|
190
|
+
|
191
|
+
|
192
|
+
|
63
193
|
from selenium import webdriver
|
64
194
|
|
65
195
|
import lxml.html
|
@@ -108,7 +238,9 @@
|
|
108
238
|
|
109
239
|
browser.find_element_by_name('LoginForm').click()
|
110
240
|
|
111
|
-
fw.write('A,B,C,D\n')
|
241
|
+
fw.write('A,B,C,D,E,F,G,H,I,J,K,L,M,N\n')
|
242
|
+
|
243
|
+
|
112
244
|
|
113
245
|
for line in fr.readlines():
|
114
246
|
|
@@ -122,37 +254,65 @@
|
|
122
254
|
|
123
255
|
def scraping(browser,code):
|
124
256
|
|
125
|
-
browser.find_element_by_id('Menu').click()
|
257
|
+
browser.find_element_by_id('kabuMenu').click()
|
126
258
|
|
127
259
|
input_code = browser.find_element_by_id('searchKey')
|
128
260
|
|
129
261
|
input_code.send_keys(code)
|
130
262
|
|
131
|
-
browser.find_element_by_id('Button').click()
|
263
|
+
browser.find_element_by_id('meigaraSearchButton').click()
|
132
264
|
|
133
265
|
try:
|
134
266
|
|
135
267
|
root = lxml.html.fromstring(browser.page_source)
|
136
268
|
|
137
|
-
meigarname = root.cssselect('#MeigaraName')[0].text_content().strip()
|
269
|
+
meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
|
138
|
-
|
270
|
+
|
139
|
-
price = root.cssselect('#meigaraPrice')[0].text_content().replace(',','').strip()
|
271
|
+
price = root.cssselect('#meigaraHeaderCurrenPrice')[0].text_content().replace(',', '').strip()
|
140
|
-
|
272
|
+
|
141
|
-
browser.find_element_by_css_selector('#InfoLink').click()
|
273
|
+
browser.find_element_by_css_selector('#quarterInfoLink').click()
|
142
274
|
|
143
275
|
root = lxml.html.fromstring(browser.page_source)
|
144
276
|
|
145
|
-
closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',','').strip()
|
277
|
+
closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',', '').strip()
|
278
|
+
|
146
|
-
|
279
|
+
feature = root.cssselect('#tokushoku')[0].text_content().replace(',', '').strip()
|
280
|
+
|
281
|
+
inspect = root.cssselect('#gyosekiMitoshi')[0].text_content().replace(',', '').strip()
|
282
|
+
|
283
|
+
topics = root.cssselect('#topixComment')[0].text_content().replace(',', '').strip()
|
284
|
+
|
285
|
+
fc_ratio = root.cssselect('#gaikokujinMochikabuHiritsu')[0].text_content().replace(',', '').strip()
|
286
|
+
|
287
|
+
fc_ratio = re.sub(r'\<.*\>\s*', '', fc_ratio).strip()
|
288
|
+
|
289
|
+
it_ratio = root.cssselect('#toshinMoshikabuHiritsu')[0].text_content().replace(',', '').strip()
|
290
|
+
|
291
|
+
it_ratio = re.sub(r'\<.*\>\s*', '', it_ratio).strip()
|
292
|
+
|
147
|
-
browser.find_element_by_css_selector(
|
293
|
+
browser.find_element_by_css_selector('#quarterInfoPerformanceLink').click()
|
148
294
|
|
149
295
|
root = lxml.html.fromstring(browser.page_source)
|
150
296
|
|
151
|
-
jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',','').strip()
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
297
|
+
jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',', '').strip()
|
298
|
+
|
299
|
+
jikoshihon_ratio = root.cssselect('#value_3')[0].text_content().replace(',', '').strip()
|
300
|
+
|
301
|
+
rieki_jyouyo = root.cssselect('#value_5')[0].text_content().replace(',', '').strip()
|
302
|
+
|
303
|
+
yurishi_husai = root.cssselect('#value_6')[0].text_content().replace(',', '').strip()
|
304
|
+
|
305
|
+
eigyou_cf = re.sub(r'((\s*\d*))', '', root.cssselect('#eigyoCashFlow')[0].text_content().replace(',', '')).strip()
|
306
|
+
|
307
|
+
eigyou_cf = re.sub(r'((\s*\d*))', '', eigyou_cf).strip()
|
308
|
+
|
309
|
+
genkin_cf = root.cssselect('#genkin')[0].text_content().replace(',', '').strip()
|
310
|
+
|
311
|
+
genkin_cf = re.sub(r'((\s*\d*))', '', genkin_cf).strip()
|
312
|
+
|
313
|
+
fw.write('{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13},{14}\n'.format(code, meigara_name, price, closing,jikoshihon, jikoshihon_ratio,rieki_jyouyo, yurishi_husai,eigyou_cf, genkin_cf, fc_ratio,it_ratio, feature, inspect,topics))
|
314
|
+
|
315
|
+
|
156
316
|
|
157
317
|
except:
|
158
318
|
|
@@ -166,9 +326,7 @@
|
|
166
326
|
|
167
327
|
if __name__ == '__main__':
|
168
328
|
|
169
|
-
main()
|
329
|
+
main()```
|
170
|
-
|
171
|
-
```
|
172
330
|
|
173
331
|
|
174
332
|
|
@@ -176,7 +334,7 @@
|
|
176
334
|
|
177
335
|
止まる箇所①(検索ボタンを押した先に該当ページが無い場合)
|
178
336
|
|
179
|
-
browser.find_element_by_id('Button').click()
|
337
|
+
browser.find_element_by_id('meigaraSearchButton').click()
|
180
338
|
|
181
339
|
で移動したあとに'#MeigaraName'があれば下の行に
|
182
340
|
|
@@ -190,7 +348,7 @@
|
|
190
348
|
|
191
349
|
止まる箇所②(ボタンがクリックできない場合)
|
192
350
|
|
193
|
-
browser.find_element_by_css_selector('#InfoLink').click()
|
351
|
+
browser.find_element_by_css_selector('#quarterInfoLink').click()
|
194
352
|
|
195
353
|
でクリックできないと止まるようなので
|
196
354
|
|
@@ -210,6 +368,22 @@
|
|
210
368
|
|
211
369
|
### 補足情報(FW/ツールのバージョンなど)
|
212
370
|
|
371
|
+
質問用にソースコードを省略していたものを全文記載しました(IP/PASS以外)
|
372
|
+
|
373
|
+
|
374
|
+
|
375
|
+
Q.seleniumを使っていたのに、途中でlxmlを使った理由は何でしょうか?
|
376
|
+
|
377
|
+
|
378
|
+
|
379
|
+
A.用途に似たソースコードの写経(動かなかったところを色々弄ってみる)をしている段階で
|
380
|
+
|
381
|
+
selenium=ブラウザを操作するもの lxml=HTMLを解析するもの 程度のイメージしかありません
|
382
|
+
|
383
|
+
ご質問の趣旨は「seleniumだけで出来るのに何故?」ということかと思いますが
|
384
|
+
|
385
|
+
私の知識が追い付いておらず申し訳ありません
|
386
|
+
|
213
387
|
|
214
388
|
|
215
389
|
ここにより詳細な情報を記載してください。HP
|