質問編集履歴
4
ソースコードの見え方がおかしかったので修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -161,7 +161,7 @@
|
|
161
161
|
pass
|
162
162
|
|
163
163
|
if __name__ == '__main__':
|
164
|
-
main()
|
164
|
+
main()
|
165
165
|
```
|
166
166
|
|
167
167
|
|
3
ソースコードの見え方がおかしかったものを修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -90,10 +90,9 @@
|
|
90
90
|
TypeError: '>=' not supported between instances of 'traceback' and 'int'
|
91
91
|
```
|
92
92
|
### 該当のソースコード
|
93
|
+
PYTHON3
|
93
94
|
|
94
|
-
|
95
|
-
|
95
|
+
```
|
96
|
-
|
97
96
|
from selenium import webdriver
|
98
97
|
import lxml.html
|
99
98
|
import re
|
@@ -163,7 +162,9 @@
|
|
163
162
|
|
164
163
|
if __name__ == '__main__':
|
165
164
|
main()```
|
165
|
+
```
|
166
166
|
|
167
|
+
|
167
168
|
### 試したこと
|
168
169
|
止まる箇所①(検索ボタンを押した先に該当ページが無い場合)
|
169
170
|
browser.find_element_by_id('meigaraSearchButton').click()
|
2
ソースコードの表示がおかしかったので修正
title
CHANGED
File without changes
|
body
CHANGED
@@ -91,9 +91,9 @@
|
|
91
91
|
```
|
92
92
|
### 該当のソースコード
|
93
93
|
|
94
|
-
```PYTHON3
|
95
|
-
# -*- encoding: utf-8 -*-
|
96
94
|
|
95
|
+
-*- encoding: utf-8 -*-
|
96
|
+
|
97
97
|
from selenium import webdriver
|
98
98
|
import lxml.html
|
99
99
|
import re
|
1
エラー全文追記 ソースコードを修正 質問に回答
title
CHANGED
File without changes
|
body
CHANGED
@@ -24,11 +24,76 @@
|
|
24
24
|
### 発生している問題・エラーメッセージ
|
25
25
|
|
26
26
|
```
|
27
|
+
止まる箇所①(検索ボタンを押した先に該当ページが無い場合)のエラー
|
28
|
+
DevTools listening on ws://127.0.0.1:12787/devtools/browser/017c4221-d8b9-4b81-9987-3def05eee8b0
|
29
|
+
1302
|
30
|
+
Traceback (most recent call last):
|
31
|
+
File "sample.py", line 41, in scraping
|
32
|
+
meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
|
33
|
+
IndexError: list index out of range
|
34
|
+
|
35
|
+
During handling of the above exception, another exception occurred:
|
36
|
+
|
37
|
+
Traceback (most recent call last):
|
38
|
+
File "sample.py", line 71, in <module>
|
39
|
+
main()
|
40
|
+
File "sample.py", line 30, in main
|
41
|
+
scraping(browser,line.rstrip())
|
42
|
+
File "sample.py", line 67, in scraping
|
43
|
+
print(traceback.format_exc(sys.exc_info()[2]))
|
44
|
+
File "C:\Python\lib\traceback.py", line 163, in format_exc
|
45
|
+
return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
|
46
|
+
File "C:\Python\lib\traceback.py", line 117, in format_exception
|
47
|
+
type(value), value, tb, limit=limit).format(chain=chain))
|
48
|
+
File "C:\Python\lib\traceback.py", line 497, in __init__
|
49
|
+
capture_locals=capture_locals)
|
50
|
+
File "C:\Python\lib\traceback.py", line 332, in extract
|
51
|
+
if limit >= 0:
|
27
52
|
TypeError: '>=' not supported between instances of 'traceback' and 'int'
|
53
|
+
|
54
|
+
止まる箇所②(ボタンがクリックできない場合)のエラー
|
55
|
+
DevTools listening on ws://127.0.0.1:12410/devtools/browser/94524637-85ee-4c7e-a0c6-0cc2f1c12ad3
|
56
|
+
1305
|
57
|
+
Traceback (most recent call last):
|
58
|
+
File "sample.py", line 43, in scraping
|
59
|
+
browser.find_element_by_css_selector('#quarterInfoLink').click()
|
60
|
+
File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 80, in click
|
61
|
+
self._execute(Command.CLICK_ELEMENT)
|
62
|
+
File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 628, in _execute
|
63
|
+
return self._parent.execute(command, params)
|
64
|
+
File "C:\Python\lib\selenium\webdriver\remote\webdriver.py", line 312, in execute
|
65
|
+
self.error_handler.check_response(response)
|
66
|
+
File "C:\Python\lib\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
|
67
|
+
raise exception_class(message, screen, stacktrace)
|
68
|
+
selenium.common.exceptions.WebDriverException: Message: unknown error: Element <a id="quarterInfoLink" href="#" class="btn is-disabled">...</a> is not clickable at point (382, 252). Other element would receive the click: <li class="item">...</li>
|
69
|
+
(Session info: chrome=67.0.3396.99)
|
70
|
+
(Driver info: chromedriver=2.35.528161 (5b82f2d2aae0ca24b877009200ced9065a772e73),platform=Windows NT 10.0.16299 x86_64)
|
71
|
+
|
72
|
+
|
73
|
+
During handling of the above exception, another exception occurred:
|
74
|
+
|
75
|
+
Traceback (most recent call last):
|
76
|
+
File "sample.py", line 71, in <module>
|
77
|
+
main()
|
78
|
+
File "sample.py", line 30, in main
|
79
|
+
scraping(browser,line.rstrip())
|
80
|
+
File "sample.py", line 67, in scraping
|
81
|
+
print(traceback.format_exc(sys.exc_info()[2]))
|
82
|
+
File "C:\Python\lib\traceback.py", line 163, in format_exc
|
83
|
+
return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
|
84
|
+
File "C:\Python\lib\traceback.py", line 117, in format_exception
|
85
|
+
type(value), value, tb, limit=limit).format(chain=chain))
|
86
|
+
File "C:\Python\lib\traceback.py", line 497, in __init__
|
87
|
+
capture_locals=capture_locals)
|
88
|
+
File "C:\Python\lib\traceback.py", line 332, in extract
|
89
|
+
if limit >= 0:
|
90
|
+
TypeError: '>=' not supported between instances of 'traceback' and 'int'
|
28
91
|
```
|
29
92
|
### 該当のソースコード
|
30
93
|
|
31
94
|
```PYTHON3
|
95
|
+
# -*- encoding: utf-8 -*-
|
96
|
+
|
32
97
|
from selenium import webdriver
|
33
98
|
import lxml.html
|
34
99
|
import re
|
@@ -53,48 +118,62 @@
|
|
53
118
|
uid.send_keys(USER_ID)
|
54
119
|
password.send_keys(PASSWORD)
|
55
120
|
browser.find_element_by_name('LoginForm').click()
|
56
|
-
fw.write('A,B,C,D\n')
|
121
|
+
fw.write('A,B,C,D,E,F,G,H,I,J,K,L,M,N\n')
|
122
|
+
|
57
123
|
for line in fr.readlines():
|
58
124
|
scraping(browser,line.rstrip())
|
59
125
|
fr.close()
|
60
126
|
fw.close()
|
61
127
|
|
62
128
|
def scraping(browser,code):
|
63
|
-
browser.find_element_by_id('
|
129
|
+
browser.find_element_by_id('kabuMenu').click()
|
64
130
|
input_code = browser.find_element_by_id('searchKey')
|
65
131
|
input_code.send_keys(code)
|
66
|
-
browser.find_element_by_id('
|
132
|
+
browser.find_element_by_id('meigaraSearchButton').click()
|
67
133
|
try:
|
68
134
|
root = lxml.html.fromstring(browser.page_source)
|
69
|
-
|
135
|
+
meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
|
70
|
-
price = root.cssselect('#
|
136
|
+
price = root.cssselect('#meigaraHeaderCurrenPrice')[0].text_content().replace(',', '').strip()
|
71
|
-
browser.find_element_by_css_selector('#
|
137
|
+
browser.find_element_by_css_selector('#quarterInfoLink').click()
|
72
138
|
root = lxml.html.fromstring(browser.page_source)
|
73
|
-
closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',','').strip()
|
139
|
+
closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',', '').strip()
|
140
|
+
feature = root.cssselect('#tokushoku')[0].text_content().replace(',', '').strip()
|
141
|
+
inspect = root.cssselect('#gyosekiMitoshi')[0].text_content().replace(',', '').strip()
|
142
|
+
topics = root.cssselect('#topixComment')[0].text_content().replace(',', '').strip()
|
143
|
+
fc_ratio = root.cssselect('#gaikokujinMochikabuHiritsu')[0].text_content().replace(',', '').strip()
|
144
|
+
fc_ratio = re.sub(r'\<.*\>\s*', '', fc_ratio).strip()
|
145
|
+
it_ratio = root.cssselect('#toshinMoshikabuHiritsu')[0].text_content().replace(',', '').strip()
|
146
|
+
it_ratio = re.sub(r'\<.*\>\s*', '', it_ratio).strip()
|
74
|
-
browser.find_element_by_css_selector(
|
147
|
+
browser.find_element_by_css_selector('#quarterInfoPerformanceLink').click()
|
75
148
|
root = lxml.html.fromstring(browser.page_source)
|
76
|
-
jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',','').strip()
|
77
|
-
|
78
|
-
|
149
|
+
jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',', '').strip()
|
150
|
+
jikoshihon_ratio = root.cssselect('#value_3')[0].text_content().replace(',', '').strip()
|
151
|
+
rieki_jyouyo = root.cssselect('#value_5')[0].text_content().replace(',', '').strip()
|
152
|
+
yurishi_husai = root.cssselect('#value_6')[0].text_content().replace(',', '').strip()
|
153
|
+
eigyou_cf = re.sub(r'((\s*\d*))', '', root.cssselect('#eigyoCashFlow')[0].text_content().replace(',', '')).strip()
|
154
|
+
eigyou_cf = re.sub(r'((\s*\d*))', '', eigyou_cf).strip()
|
155
|
+
genkin_cf = root.cssselect('#genkin')[0].text_content().replace(',', '').strip()
|
156
|
+
genkin_cf = re.sub(r'((\s*\d*))', '', genkin_cf).strip()
|
157
|
+
fw.write('{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13},{14}\n'.format(code, meigara_name, price, closing,jikoshihon, jikoshihon_ratio,rieki_jyouyo, yurishi_husai,eigyou_cf, genkin_cf, fc_ratio,it_ratio, feature, inspect,topics))
|
158
|
+
|
79
159
|
except:
|
80
160
|
print(code)
|
81
161
|
print(traceback.format_exc(sys.exc_info()[2]))
|
82
162
|
pass
|
83
163
|
|
84
164
|
if __name__ == '__main__':
|
85
|
-
main()
|
165
|
+
main()```
|
86
|
-
```
|
87
166
|
|
88
167
|
### 試したこと
|
89
168
|
止まる箇所①(検索ボタンを押した先に該当ページが無い場合)
|
90
|
-
browser.find_element_by_id('
|
169
|
+
browser.find_element_by_id('meigaraSearchButton').click()
|
91
170
|
で移動したあとに'#MeigaraName'があれば下の行に
|
92
171
|
無ければ
|
93
172
|
input_code.send_keys(code)
|
94
173
|
まで戻るような記述がIF式で書ければと思って検索するも解決できないでいます
|
95
174
|
|
96
175
|
止まる箇所②(ボタンがクリックできない場合)
|
97
|
-
browser.find_element_by_css_selector('#
|
176
|
+
browser.find_element_by_css_selector('#quarterInfoLink').click()
|
98
177
|
でクリックできないと止まるようなので
|
99
178
|
クリックできるところと出来ないところの違いを見ると
|
100
179
|
出来ない: <a id="InfoLink" href="#" class="btn is-disabled">
|
@@ -104,5 +183,13 @@
|
|
104
183
|
まで戻るような記述がIF式で書ければと思って検索するも解決できないでいます
|
105
184
|
|
106
185
|
### 補足情報(FW/ツールのバージョンなど)
|
186
|
+
質問用にソースコードを省略していたものを全文記載しました(IP/PASS以外)
|
107
187
|
|
188
|
+
Q.seleniumを使っていたのに、途中でlxmlを使った理由は何でしょうか?
|
189
|
+
|
190
|
+
A.用途に似たソースコードの写経(動かなかったところを色々弄ってみる)をしている段階で
|
191
|
+
selenium=ブラウザを操作するもの lxml=HTMLを解析するもの 程度のイメージしかありません
|
192
|
+
ご質問の趣旨は「seleniumだけで出来るのに何故?」ということかと思いますが
|
193
|
+
私の知識が追い付いておらず申し訳ありません
|
194
|
+
|
108
195
|
ここにより詳細な情報を記載してください。HP
|