質問編集履歴

3

コード修正

2022/11/02 04:55

投稿

k.nakazono
k.nakazono

スコア4

test CHANGED
File without changes
test CHANGED
@@ -41,8 +41,8 @@
41
41
 
42
42
 
43
43
  # メイン処理
44
- DOMAIN = "https://www.xxxxxxx.co.jp/"
44
+ DOMAIN = "https://www.janpara.co.jp/"
45
- START_URL = "https://www.xxxxxx.co.jp/buy/search/result/?KEYWORDS=&OUTCLSCODE=46&CLSCODE=&LINE=24"
45
+ START_URL = "https://www.janpara.co.jp/buy/search/result/?KEYWORDS=&OUTCLSCODE=46&CLSCODE=&LINE=24"
46
46
  PROXY = 'localhost:9050'
47
47
 
48
48
  if __name__ == '__main__':

2

ソースコードの簡易化

2022/11/02 04:54

投稿

k.nakazono
k.nakazono

スコア4

test CHANGED
File without changes
test CHANGED
@@ -67,56 +67,13 @@
67
67
 
68
68
  for detail_url in detail_urls:
69
69
  try:
70
- print(detail_url)
71
- driver.get(detail_url)
72
- time.sleep(1)
73
-
74
- if driver.find_elements(By.CSS_SELECTOR, ".items > label[for*='used']"):
75
- unused_label = driver.find_element(By.CSS_SELECTOR, ".items > label[for='used-2']")
76
- unused_label.click()
77
-
78
- results.append({
70
+ # ここで要素の情報を取得する
79
- "data_source": "JANPARA",
80
- "title": title,
81
- "url": driver.current_url,
82
- "color": None,
83
- "condition": unused_label.text,
84
- "accessories": None,
85
- "is_sim_free": None,
86
- "price": int(re.sub("\D", "", driver.find_elements(By.CLASS_NAME, 'detail_item_money')[-1].text)),
87
- "description": None
88
-
89
- })
90
-
91
- used_label = driver.find_element(By.CSS_SELECTOR, ".items > label[for='used-1']")
92
- used_label.click()
93
-
94
- title = driver.find_element(By.CSS_SELECTOR, '.sub_cont>h3').text
95
-
96
- conditions = driver.find_elements(By.CSS_SELECTOR, ".items > label[for*='状態']")
97
- options = driver.find_elements(By.CSS_SELECTOR, ".items > label[for*='付属品']")
98
- for condition, option in itertools.product(conditions, options): #Xの10通り、Yの10通りの全組み合わせ
99
- condition.click()
100
- option.click()
101
-
102
- price = int(re.sub("\D", "", driver.find_element(By.ID, 'satei_amt').text))
103
- result = {
104
- "data_source": "JANPARA",
105
- "title": title,
106
- "url": driver.current_url,
107
- "color": None,
108
- "condition": condition.text,
109
- "accessories": option.text,
110
- "is_sim_free": None,
111
- "price": price,
112
- "description": None
113
- }
114
- results.append(result)
115
71
 
116
72
  except Exception as e:
117
73
  print(e)
118
74
  continue
119
75
 
76
+ # 次ページへ
120
77
  nextlink = get_nextlink("/buy/search/result/" ,soup)
121
78
  if not nextlink:
122
79
  break

1

ソースこーどの修正

2022/11/02 04:53

投稿

k.nakazono
k.nakazono

スコア4

test CHANGED
File without changes
test CHANGED
@@ -38,6 +38,8 @@
38
38
  ### 該当のソースコード
39
39
 
40
40
  ```python
41
+
42
+
41
43
  # メイン処理
42
44
  DOMAIN = "https://www.xxxxxxx.co.jp/"
43
45
  START_URL = "https://www.xxxxxx.co.jp/buy/search/result/?KEYWORDS=&OUTCLSCODE=46&CLSCODE=&LINE=24"
@@ -119,6 +121,79 @@
119
121
  if not nextlink:
120
122
  break
121
123
 
124
+ class TorControlPortClient:
125
+ control_address: str
126
+ control_port: int
127
+ control_password: Optional[str]
128
+
129
+ def __init__(
130
+ self,
131
+ control_address: str,
132
+ control_port: int,
133
+ control_password: Optional[str] = None
134
+ ):
135
+ self.control_address = control_address
136
+ self.control_port = control_port
137
+ self.control_password = control_password
138
+
139
+ def change_connection_ip(self, seconds_wait: int = 5) -> bool:
140
+ time.sleep(seconds_wait)
141
+ try:
142
+ tor_connection = socket.create_connection((self.control_address, self.control_port))
143
+ password_value = self.control_password if self.control_password is not None else ''
144
+ message = f'AUTHENTICATE "{password_value}"\r\nSIGNAL NEWNYM\r\n'
145
+ tor_connection.send(message.encode('utf-8'))
146
+ response = tor_connection.recv(1024)
147
+ if response != b'250 OK\r\n250 OK\r\n':
148
+ sys.stderr.write('Unexpected response from Tor control port: {}\n'.format(response))
149
+ return False
150
+ return True
151
+ except Exception as e:
152
+ print(e)
153
+ sys.stderr.write('Error connecting to Tor control port: {}\n'.format(repr(e)))
154
+ return False
155
+
156
+ @retry(wait=wait_exponential(multiplier=1, min=3, max=50))
157
+ def get_html(url):
158
+ """
159
+ HTTPリクエストしてBeautifulSoupオブジェクトに変換する
160
+ """
161
+ headers = {
162
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"
163
+ }
164
+
165
+ proxies = {
166
+ 'http': 'socks5://localhost:9050',
167
+ 'https': 'socks5://localhost:9050',
168
+ }
169
+
170
+ time.sleep(2)
171
+
172
+ res = requests.get(url, headers=headers, proxies=proxies)
173
+ print(res.content)
174
+ if 300 <= res.status_code <= 599:
175
+ tor_control_port_client = TorControlPortClient('localhost', 9051, 'test1234')
176
+ tor_control_port_client.change_connection_ip(seconds_wait=3)
177
+ print("IP Address is Changed")
178
+ raise Exception("IPチェンジ!!!")
179
+
180
+ soup = BeautifulSoup(res.content, 'html.parser')
181
+ return soup
182
+
183
+ def get_detail_urls(soup):
184
+ items = soup.select(".search_item > h3 > a[href]")
185
+ for item in items:
186
+ yield item.get("href")
187
+
188
+ def get_nextlink(path, soup):
189
+ try:
190
+ nextlink = DOMAIN + path + soup.select_one(".pageLink[title='次ページ']").get("href")
191
+ except Exception as e:
192
+ print("Last Page")
193
+ nextlink = None
194
+
195
+ return nextlink
196
+
122
197
  ```
123
198
 
124
199
  ### 試したこと