teratail header banner
teratail header banner
質問するログイン新規登録

回答編集履歴

1

edit

2018/01/09 11:51

投稿

mkgrei
mkgrei

スコア8562

answer CHANGED
@@ -4,4 +4,73 @@
4
4
  for a in args.jan:
5
5
  products[a] = {}
6
6
  v = products[a]
7
+ ```
8
+
9
+ ---
10
+
11
+ Selenium.webdriverにバグがあってJANコードがあってもたまに取得にミスるみたいですね。
12
+
13
+ ```python
14
+ #!/usr/bin/env python
15
+ # -*- coding: utf-8 -*-
16
+ from selenium import webdriver
17
+ from selenium.common.exceptions import NoSuchElementException
18
+ from selenium.webdriver.common.keys import Keys
19
+ import argparse
20
+
21
+ DELAY_SLEEP = 1
22
+ JANJAN_URL = "https://antlion.xsrv.jp/"
23
+
24
+ def get_input(fmimic=True):
25
+ if fmimic:
26
+ class MimicArgs:
27
+ def __init__(self):
28
+ self.jan = [4976219091275, 4873116953, 4873116950000]
29
+ args = MimicArgs()
30
+ else:
31
+ parser = argparse.ArgumentParser(description='Get asin from JANJAN_URL')
32
+ parser.add_argument('--jan', nargs='+')
33
+ args = parser.parse_args()
34
+ return args
35
+
36
+ def main():
37
+ args = get_input()
38
+
39
+ driver = webdriver.Chrome()
40
+
41
+ products = {}
42
+ for jan in args.jan:
43
+ try:
44
+ driver.get(JANJAN_URL)
45
+ jan_input_box = driver.find_element_by_xpath('//*[@id="content"]/div[1]/div/form/input[2]')
46
+ jan_input_box.send_keys(jan)
47
+ jan_input_button = driver.find_element_by_xpath('//*[@id="content"]/div[1]/div/form/input[3]')
48
+ jan_input_button.click()
49
+
50
+ content = driver.find_elements_by_xpath('//*[@id="content"]/div[2]/div[5]/ul/li')
51
+ if len(content) == 0:
52
+ raise NoSuchElementException
53
+ asin = None
54
+ for e in content:
55
+ desc = e.get_attribute('textContent')
56
+ if desc.startswith("ASIN"):
57
+ asin = desc.split()[-1]
58
+ if asin is None:
59
+ print('ASIN not found')
60
+ title = driver.find_elements_by_xpath('//*[@id="content"]/div[2]/div[3]/h3/a')[0].text
61
+ except NoSuchElementException:
62
+ print("not available")
63
+ print(jan)
64
+ continue
65
+ products[title] = {}
66
+ v = products[title]
67
+ v['ASIN'] = asin
68
+ v['JAN'] = jan
69
+
70
+ for k, v in products.items():
71
+ print(k, v)
72
+ driver.close()
73
+
74
+ if __name__ == "__main__":
75
+ main()
7
76
  ```