質問編集履歴

4

ソースコードの見え方がおかしかったので修正

2018/07/18 12:11

投稿

psy
psy

スコア4

test CHANGED
File without changes
test CHANGED
@@ -324,7 +324,7 @@
324
324
 
325
325
  if __name__ == '__main__':
326
326
 
327
- main()```
327
+ main()
328
328
 
329
329
  ```
330
330
 

3

ソースコードの見え方がおかしかったものを修正

2018/07/18 12:11

投稿

psy
psy

スコア4

test CHANGED
File without changes
test CHANGED
@@ -182,13 +182,11 @@
182
182
 
183
183
  ### 該当のソースコード
184
184
 
185
-
186
-
187
-
188
-
189
- -*- encoding: utf-8 -*-
185
+ PYTHON3
186
+
187
+
188
+
190
-
189
+ ```
191
-
192
190
 
193
191
  from selenium import webdriver
194
192
 
@@ -328,6 +326,10 @@
328
326
 
329
327
  main()```
330
328
 
329
+ ```
330
+
331
+
332
+
331
333
 
332
334
 
333
335
  ### 試したこと

2

ソースコードの表示がおかしかったので修正

2018/07/18 12:10

投稿

psy
psy

スコア4

test CHANGED
File without changes
test CHANGED
@@ -184,9 +184,9 @@
184
184
 
185
185
 
186
186
 
187
- ```PYTHON3
187
+
188
-
188
+
189
- # -*- encoding: utf-8 -*-
189
+ -*- encoding: utf-8 -*-
190
190
 
191
191
 
192
192
 

1

エラー全文追記 ソースコードを修正 質問に回答

2018/07/18 12:08

投稿

psy
psy

スコア4

test CHANGED
File without changes
test CHANGED
@@ -50,8 +50,134 @@
50
50
 
51
51
  ```
52
52
 
53
+ 止まる箇所①(検索ボタンを押した先に該当ページが無い場合)のエラー
54
+
55
+ DevTools listening on ws://127.0.0.1:12787/devtools/browser/017c4221-d8b9-4b81-9987-3def05eee8b0
56
+
57
+ 1302
58
+
59
+ Traceback (most recent call last):
60
+
61
+ File "sample.py", line 41, in scraping
62
+
63
+ meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
64
+
65
+ IndexError: list index out of range
66
+
67
+
68
+
69
+ During handling of the above exception, another exception occurred:
70
+
71
+
72
+
73
+ Traceback (most recent call last):
74
+
75
+ File "sample.py", line 71, in <module>
76
+
77
+ main()
78
+
79
+ File "sample.py", line 30, in main
80
+
81
+ scraping(browser,line.rstrip())
82
+
83
+ File "sample.py", line 67, in scraping
84
+
85
+ print(traceback.format_exc(sys.exc_info()[2]))
86
+
87
+ File "C:\Python\lib\traceback.py", line 163, in format_exc
88
+
89
+ return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
90
+
91
+ File "C:\Python\lib\traceback.py", line 117, in format_exception
92
+
93
+ type(value), value, tb, limit=limit).format(chain=chain))
94
+
95
+ File "C:\Python\lib\traceback.py", line 497, in __init__
96
+
97
+ capture_locals=capture_locals)
98
+
99
+ File "C:\Python\lib\traceback.py", line 332, in extract
100
+
101
+ if limit >= 0:
102
+
53
103
  TypeError: '>=' not supported between instances of 'traceback' and 'int'
54
104
 
105
+
106
+
107
+ 止まる箇所②(ボタンがクリックできない場合)のエラー
108
+
109
+ DevTools listening on ws://127.0.0.1:12410/devtools/browser/94524637-85ee-4c7e-a0c6-0cc2f1c12ad3
110
+
111
+ 1305
112
+
113
+ Traceback (most recent call last):
114
+
115
+ File "sample.py", line 43, in scraping
116
+
117
+ browser.find_element_by_css_selector('#quarterInfoLink').click()
118
+
119
+ File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 80, in click
120
+
121
+ self._execute(Command.CLICK_ELEMENT)
122
+
123
+ File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 628, in _execute
124
+
125
+ return self._parent.execute(command, params)
126
+
127
+ File "C:\Python\lib\selenium\webdriver\remote\webdriver.py", line 312, in execute
128
+
129
+ self.error_handler.check_response(response)
130
+
131
+ File "C:\Python\lib\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
132
+
133
+ raise exception_class(message, screen, stacktrace)
134
+
135
+ selenium.common.exceptions.WebDriverException: Message: unknown error: Element <a id="quarterInfoLink" href="#" class="btn is-disabled">...</a> is not clickable at point (382, 252). Other element would receive the click: <li class="item">...</li>
136
+
137
+ (Session info: chrome=67.0.3396.99)
138
+
139
+ (Driver info: chromedriver=2.35.528161 (5b82f2d2aae0ca24b877009200ced9065a772e73),platform=Windows NT 10.0.16299 x86_64)
140
+
141
+
142
+
143
+
144
+
145
+ During handling of the above exception, another exception occurred:
146
+
147
+
148
+
149
+ Traceback (most recent call last):
150
+
151
+ File "sample.py", line 71, in <module>
152
+
153
+ main()
154
+
155
+ File "sample.py", line 30, in main
156
+
157
+ scraping(browser,line.rstrip())
158
+
159
+ File "sample.py", line 67, in scraping
160
+
161
+ print(traceback.format_exc(sys.exc_info()[2]))
162
+
163
+ File "C:\Python\lib\traceback.py", line 163, in format_exc
164
+
165
+ return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
166
+
167
+ File "C:\Python\lib\traceback.py", line 117, in format_exception
168
+
169
+ type(value), value, tb, limit=limit).format(chain=chain))
170
+
171
+ File "C:\Python\lib\traceback.py", line 497, in __init__
172
+
173
+ capture_locals=capture_locals)
174
+
175
+ File "C:\Python\lib\traceback.py", line 332, in extract
176
+
177
+ if limit >= 0:
178
+
179
+ TypeError: '>=' not supported between instances of 'traceback' and 'int'
180
+
55
181
  ```
56
182
 
57
183
  ### 該当のソースコード
@@ -60,6 +186,10 @@
60
186
 
61
187
  ```PYTHON3
62
188
 
189
+ # -*- encoding: utf-8 -*-
190
+
191
+
192
+
63
193
  from selenium import webdriver
64
194
 
65
195
  import lxml.html
@@ -108,7 +238,9 @@
108
238
 
109
239
  browser.find_element_by_name('LoginForm').click()
110
240
 
111
- fw.write('A,B,C,D\n')
241
+ fw.write('A,B,C,D,E,F,G,H,I,J,K,L,M,N\n')
242
+
243
+
112
244
 
113
245
  for line in fr.readlines():
114
246
 
@@ -122,37 +254,65 @@
122
254
 
123
255
  def scraping(browser,code):
124
256
 
125
- browser.find_element_by_id('Menu').click()
257
+ browser.find_element_by_id('kabuMenu').click()
126
258
 
127
259
  input_code = browser.find_element_by_id('searchKey')
128
260
 
129
261
  input_code.send_keys(code)
130
262
 
131
- browser.find_element_by_id('Button').click()
263
+ browser.find_element_by_id('meigaraSearchButton').click()
132
264
 
133
265
  try:
134
266
 
135
267
  root = lxml.html.fromstring(browser.page_source)
136
268
 
137
- meigarname = root.cssselect('#MeigaraName')[0].text_content().strip()
269
+ meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
138
-
270
+
139
- price = root.cssselect('#meigaraPrice')[0].text_content().replace(',','').strip()
271
+ price = root.cssselect('#meigaraHeaderCurrenPrice')[0].text_content().replace(',', '').strip()
140
-
272
+
141
- browser.find_element_by_css_selector('#InfoLink').click()
273
+ browser.find_element_by_css_selector('#quarterInfoLink').click()
142
274
 
143
275
  root = lxml.html.fromstring(browser.page_source)
144
276
 
145
- closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',','').strip()
277
+ closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',', '').strip()
278
+
146
-
279
+ feature = root.cssselect('#tokushoku')[0].text_content().replace(',', '').strip()
280
+
281
+ inspect = root.cssselect('#gyosekiMitoshi')[0].text_content().replace(',', '').strip()
282
+
283
+ topics = root.cssselect('#topixComment')[0].text_content().replace(',', '').strip()
284
+
285
+ fc_ratio = root.cssselect('#gaikokujinMochikabuHiritsu')[0].text_content().replace(',', '').strip()
286
+
287
+ fc_ratio = re.sub(r'\<.*\>\s*', '', fc_ratio).strip()
288
+
289
+ it_ratio = root.cssselect('#toshinMoshikabuHiritsu')[0].text_content().replace(',', '').strip()
290
+
291
+ it_ratio = re.sub(r'\<.*\>\s*', '', it_ratio).strip()
292
+
147
- browser.find_element_by_css_selector("#InfoPerLink").click()
293
+ browser.find_element_by_css_selector('#quarterInfoPerformanceLink').click()
148
294
 
149
295
  root = lxml.html.fromstring(browser.page_source)
150
296
 
151
- jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',','').strip()
152
-
153
- fw.write('{0},{1},{2},{3}\n'.format(meigaraname,price,closing,jikoshihon))
154
-
155
- time.sleep(2.0)
297
+ jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',', '').strip()
298
+
299
+ jikoshihon_ratio = root.cssselect('#value_3')[0].text_content().replace(',', '').strip()
300
+
301
+ rieki_jyouyo = root.cssselect('#value_5')[0].text_content().replace(',', '').strip()
302
+
303
+ yurishi_husai = root.cssselect('#value_6')[0].text_content().replace(',', '').strip()
304
+
305
+ eigyou_cf = re.sub(r'((\s*\d*))', '', root.cssselect('#eigyoCashFlow')[0].text_content().replace(',', '')).strip()
306
+
307
+ eigyou_cf = re.sub(r'((\s*\d*))', '', eigyou_cf).strip()
308
+
309
+ genkin_cf = root.cssselect('#genkin')[0].text_content().replace(',', '').strip()
310
+
311
+ genkin_cf = re.sub(r'((\s*\d*))', '', genkin_cf).strip()
312
+
313
+ fw.write('{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13},{14}\n'.format(code, meigara_name, price, closing,jikoshihon, jikoshihon_ratio,rieki_jyouyo, yurishi_husai,eigyou_cf, genkin_cf, fc_ratio,it_ratio, feature, inspect,topics))
314
+
315
+
156
316
 
157
317
  except:
158
318
 
@@ -166,9 +326,7 @@
166
326
 
167
327
  if __name__ == '__main__':
168
328
 
169
- main()
329
+ main()```
170
-
171
- ```
172
330
 
173
331
 
174
332
 
@@ -176,7 +334,7 @@
176
334
 
177
335
  止まる箇所①(検索ボタンを押した先に該当ページが無い場合)
178
336
 
179
- browser.find_element_by_id('Button').click()
337
+ browser.find_element_by_id('meigaraSearchButton').click()
180
338
 
181
339
    で移動したあとに'#MeigaraName'があれば下の行に
182
340
 
@@ -190,7 +348,7 @@
190
348
 
191
349
  止まる箇所②(ボタンがクリックできない場合)
192
350
 
193
-   browser.find_element_by_css_selector('#InfoLink').click()
351
+   browser.find_element_by_css_selector('#quarterInfoLink').click()
194
352
 
195
353
     でクリックできないと止まるようなので
196
354
 
@@ -210,6 +368,22 @@
210
368
 
211
369
  ### 補足情報(FW/ツールのバージョンなど)
212
370
 
371
+ 質問用にソースコードを省略していたものを全文記載しました(IP/PASS以外)
372
+
373
+
374
+
375
+ Q.seleniumを使っていたのに、途中でlxmlを使った理由は何でしょうか?
376
+
377
+
378
+
379
+ A.用途に似たソースコードの写経(動かなかったところを色々弄ってみる)をしている段階で
380
+
381
+  selenium=ブラウザを操作するもの lxml=HTMLを解析するもの 程度のイメージしかありません
382
+
383
+  ご質問の趣旨は「seleniumだけで出来るのに何故?」ということかと思いますが
384
+
385
+  私の知識が追い付いておらず申し訳ありません 
386
+
213
387
 
214
388
 
215
389
  ここにより詳細な情報を記載してください。HP