teratail header banner
teratail header banner
質問するログイン新規登録

質問編集履歴

4

ソースコードの見え方がおかしかったので修正

2018/07/18 12:11

投稿

psy
psy

スコア4

title CHANGED
File without changes
body CHANGED
@@ -161,7 +161,7 @@
161
161
  pass
162
162
 
163
163
  if __name__ == '__main__':
164
- main()```
164
+ main()
165
165
  ```
166
166
 
167
167
 

3

ソースコードの見え方がおかしかったものを修正

2018/07/18 12:11

投稿

psy
psy

スコア4

title CHANGED
File without changes
body CHANGED
@@ -90,10 +90,9 @@
90
90
  TypeError: '>=' not supported between instances of 'traceback' and 'int'
91
91
  ```
92
92
  ### 該当のソースコード
93
+ PYTHON3
93
94
 
94
-
95
- -*- encoding: utf-8 -*-
95
+ ```
96
-
97
96
  from selenium import webdriver
98
97
  import lxml.html
99
98
  import re
@@ -163,7 +162,9 @@
163
162
 
164
163
  if __name__ == '__main__':
165
164
  main()```
165
+ ```
166
166
 
167
+
167
168
  ### 試したこと
168
169
  止まる箇所①(検索ボタンを押した先に該当ページが無い場合)
169
170
  browser.find_element_by_id('meigaraSearchButton').click()

2

ソースコードの表示がおかしかったので修正

2018/07/18 12:10

投稿

psy
psy

スコア4

title CHANGED
File without changes
body CHANGED
@@ -91,9 +91,9 @@
91
91
  ```
92
92
  ### 該当のソースコード
93
93
 
94
- ```PYTHON3
95
- # -*- encoding: utf-8 -*-
96
94
 
95
+ -*- encoding: utf-8 -*-
96
+
97
97
  from selenium import webdriver
98
98
  import lxml.html
99
99
  import re

1

エラー全文追記 ソースコードを修正 質問に回答

2018/07/18 12:08

投稿

psy
psy

スコア4

title CHANGED
File without changes
body CHANGED
@@ -24,11 +24,76 @@
24
24
  ### 発生している問題・エラーメッセージ
25
25
 
26
26
  ```
27
+ 止まる箇所①(検索ボタンを押した先に該当ページが無い場合)のエラー
28
+ DevTools listening on ws://127.0.0.1:12787/devtools/browser/017c4221-d8b9-4b81-9987-3def05eee8b0
29
+ 1302
30
+ Traceback (most recent call last):
31
+ File "sample.py", line 41, in scraping
32
+ meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
33
+ IndexError: list index out of range
34
+
35
+ During handling of the above exception, another exception occurred:
36
+
37
+ Traceback (most recent call last):
38
+ File "sample.py", line 71, in <module>
39
+ main()
40
+ File "sample.py", line 30, in main
41
+ scraping(browser,line.rstrip())
42
+ File "sample.py", line 67, in scraping
43
+ print(traceback.format_exc(sys.exc_info()[2]))
44
+ File "C:\Python\lib\traceback.py", line 163, in format_exc
45
+ return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
46
+ File "C:\Python\lib\traceback.py", line 117, in format_exception
47
+ type(value), value, tb, limit=limit).format(chain=chain))
48
+ File "C:\Python\lib\traceback.py", line 497, in __init__
49
+ capture_locals=capture_locals)
50
+ File "C:\Python\lib\traceback.py", line 332, in extract
51
+ if limit >= 0:
27
52
  TypeError: '>=' not supported between instances of 'traceback' and 'int'
53
+
54
+ 止まる箇所②(ボタンがクリックできない場合)のエラー
55
+ DevTools listening on ws://127.0.0.1:12410/devtools/browser/94524637-85ee-4c7e-a0c6-0cc2f1c12ad3
56
+ 1305
57
+ Traceback (most recent call last):
58
+ File "sample.py", line 43, in scraping
59
+ browser.find_element_by_css_selector('#quarterInfoLink').click()
60
+ File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 80, in click
61
+ self._execute(Command.CLICK_ELEMENT)
62
+ File "C:\Python\lib\selenium\webdriver\remote\webelement.py", line 628, in _execute
63
+ return self._parent.execute(command, params)
64
+ File "C:\Python\lib\selenium\webdriver\remote\webdriver.py", line 312, in execute
65
+ self.error_handler.check_response(response)
66
+ File "C:\Python\lib\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
67
+ raise exception_class(message, screen, stacktrace)
68
+ selenium.common.exceptions.WebDriverException: Message: unknown error: Element <a id="quarterInfoLink" href="#" class="btn is-disabled">...</a> is not clickable at point (382, 252). Other element would receive the click: <li class="item">...</li>
69
+ (Session info: chrome=67.0.3396.99)
70
+ (Driver info: chromedriver=2.35.528161 (5b82f2d2aae0ca24b877009200ced9065a772e73),platform=Windows NT 10.0.16299 x86_64)
71
+
72
+
73
+ During handling of the above exception, another exception occurred:
74
+
75
+ Traceback (most recent call last):
76
+ File "sample.py", line 71, in <module>
77
+ main()
78
+ File "sample.py", line 30, in main
79
+ scraping(browser,line.rstrip())
80
+ File "sample.py", line 67, in scraping
81
+ print(traceback.format_exc(sys.exc_info()[2]))
82
+ File "C:\Python\lib\traceback.py", line 163, in format_exc
83
+ return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain))
84
+ File "C:\Python\lib\traceback.py", line 117, in format_exception
85
+ type(value), value, tb, limit=limit).format(chain=chain))
86
+ File "C:\Python\lib\traceback.py", line 497, in __init__
87
+ capture_locals=capture_locals)
88
+ File "C:\Python\lib\traceback.py", line 332, in extract
89
+ if limit >= 0:
90
+ TypeError: '>=' not supported between instances of 'traceback' and 'int'
28
91
  ```
29
92
  ### 該当のソースコード
30
93
 
31
94
  ```PYTHON3
95
+ # -*- encoding: utf-8 -*-
96
+
32
97
  from selenium import webdriver
33
98
  import lxml.html
34
99
  import re
@@ -53,48 +118,62 @@
53
118
  uid.send_keys(USER_ID)
54
119
  password.send_keys(PASSWORD)
55
120
  browser.find_element_by_name('LoginForm').click()
56
- fw.write('A,B,C,D\n')
121
+ fw.write('A,B,C,D,E,F,G,H,I,J,K,L,M,N\n')
122
+
57
123
  for line in fr.readlines():
58
124
  scraping(browser,line.rstrip())
59
125
  fr.close()
60
126
  fw.close()
61
127
 
62
128
  def scraping(browser,code):
63
- browser.find_element_by_id('Menu').click()
129
+ browser.find_element_by_id('kabuMenu').click()
64
130
  input_code = browser.find_element_by_id('searchKey')
65
131
  input_code.send_keys(code)
66
- browser.find_element_by_id('Button').click()
132
+ browser.find_element_by_id('meigaraSearchButton').click()
67
133
  try:
68
134
  root = lxml.html.fromstring(browser.page_source)
69
- meigarname = root.cssselect('#MeigaraName')[0].text_content().strip()
135
+ meigara_name = root.cssselect('#meigaraHeaderMeigaraName')[0].text_content().strip()
70
- price = root.cssselect('#meigaraPrice')[0].text_content().replace(',','').strip()
136
+ price = root.cssselect('#meigaraHeaderCurrenPrice')[0].text_content().replace(',', '').strip()
71
- browser.find_element_by_css_selector('#InfoLink').click()
137
+ browser.find_element_by_css_selector('#quarterInfoLink').click()
72
138
  root = lxml.html.fromstring(browser.page_source)
73
- closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',','').strip()
139
+ closing = root.cssselect('#fiscalTerm')[0].text_content().replace(',', '').strip()
140
+ feature = root.cssselect('#tokushoku')[0].text_content().replace(',', '').strip()
141
+ inspect = root.cssselect('#gyosekiMitoshi')[0].text_content().replace(',', '').strip()
142
+ topics = root.cssselect('#topixComment')[0].text_content().replace(',', '').strip()
143
+ fc_ratio = root.cssselect('#gaikokujinMochikabuHiritsu')[0].text_content().replace(',', '').strip()
144
+ fc_ratio = re.sub(r'\<.*\>\s*', '', fc_ratio).strip()
145
+ it_ratio = root.cssselect('#toshinMoshikabuHiritsu')[0].text_content().replace(',', '').strip()
146
+ it_ratio = re.sub(r'\<.*\>\s*', '', it_ratio).strip()
74
- browser.find_element_by_css_selector("#InfoPerLink").click()
147
+ browser.find_element_by_css_selector('#quarterInfoPerformanceLink').click()
75
148
  root = lxml.html.fromstring(browser.page_source)
76
- jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',','').strip()
77
- fw.write('{0},{1},{2},{3}\n'.format(meigaraname,price,closing,jikoshihon))
78
- time.sleep(2.0)
149
+ jikoshihon = root.cssselect('#value_2')[0].text_content().replace(',', '').strip()
150
+ jikoshihon_ratio = root.cssselect('#value_3')[0].text_content().replace(',', '').strip()
151
+ rieki_jyouyo = root.cssselect('#value_5')[0].text_content().replace(',', '').strip()
152
+ yurishi_husai = root.cssselect('#value_6')[0].text_content().replace(',', '').strip()
153
+ eigyou_cf = re.sub(r'((\s*\d*))', '', root.cssselect('#eigyoCashFlow')[0].text_content().replace(',', '')).strip()
154
+ eigyou_cf = re.sub(r'((\s*\d*))', '', eigyou_cf).strip()
155
+ genkin_cf = root.cssselect('#genkin')[0].text_content().replace(',', '').strip()
156
+ genkin_cf = re.sub(r'((\s*\d*))', '', genkin_cf).strip()
157
+ fw.write('{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},{11},{12},{13},{14}\n'.format(code, meigara_name, price, closing,jikoshihon, jikoshihon_ratio,rieki_jyouyo, yurishi_husai,eigyou_cf, genkin_cf, fc_ratio,it_ratio, feature, inspect,topics))
158
+
79
159
  except:
80
160
  print(code)
81
161
  print(traceback.format_exc(sys.exc_info()[2]))
82
162
  pass
83
163
 
84
164
  if __name__ == '__main__':
85
- main()
165
+ main()```
86
- ```
87
166
 
88
167
  ### 試したこと
89
168
  止まる箇所①(検索ボタンを押した先に該当ページが無い場合)
90
- browser.find_element_by_id('Button').click()
169
+ browser.find_element_by_id('meigaraSearchButton').click()
91
170
    で移動したあとに'#MeigaraName'があれば下の行に
92
171
    無ければ
93
172
  input_code.send_keys(code)
94
173
    まで戻るような記述がIF式で書ければと思って検索するも解決できないでいます
95
174
 
96
175
  止まる箇所②(ボタンがクリックできない場合)
97
-   browser.find_element_by_css_selector('#InfoLink').click()
176
+   browser.find_element_by_css_selector('#quarterInfoLink').click()
98
177
     でクリックできないと止まるようなので
99
178
     クリックできるところと出来ないところの違いを見ると
100
179
   出来ない: <a id="InfoLink" href="#" class="btn is-disabled">
@@ -104,5 +183,13 @@
104
183
    まで戻るような記述がIF式で書ければと思って検索するも解決できないでいます
105
184
 
106
185
  ### 補足情報(FW/ツールのバージョンなど)
186
+ 質問用にソースコードを省略していたものを全文記載しました(IP/PASS以外)
107
187
 
188
+ Q.seleniumを使っていたのに、途中でlxmlを使った理由は何でしょうか?
189
+
190
+ A.用途に似たソースコードの写経(動かなかったところを色々弄ってみる)をしている段階で
191
+  selenium=ブラウザを操作するもの lxml=HTMLを解析するもの 程度のイメージしかありません
192
+  ご質問の趣旨は「seleniumだけで出来るのに何故?」ということかと思いますが
193
+  私の知識が追い付いておらず申し訳ありません 
194
+
108
195
  ここにより詳細な情報を記載してください。HP