質問編集履歴
3
コード修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -41,8 +41,8 @@
|
|
41
41
|
|
42
42
|
|
43
43
|
# メイン処理
|
44
|
-
DOMAIN = "https://www.
|
44
|
+
DOMAIN = "https://www.janpara.co.jp/"
|
45
|
-
START_URL = "https://www.
|
45
|
+
START_URL = "https://www.janpara.co.jp/buy/search/result/?KEYWORDS=&OUTCLSCODE=46&CLSCODE=&LINE=24"
|
46
46
|
PROXY = 'localhost:9050'
|
47
47
|
|
48
48
|
if __name__ == '__main__':
|
2
ソースコードの簡易化
test
CHANGED
File without changes
|
test
CHANGED
@@ -67,56 +67,13 @@
|
|
67
67
|
|
68
68
|
for detail_url in detail_urls:
|
69
69
|
try:
|
70
|
-
print(detail_url)
|
71
|
-
driver.get(detail_url)
|
72
|
-
time.sleep(1)
|
73
|
-
|
74
|
-
if driver.find_elements(By.CSS_SELECTOR, ".items > label[for*='used']"):
|
75
|
-
unused_label = driver.find_element(By.CSS_SELECTOR, ".items > label[for='used-2']")
|
76
|
-
unused_label.click()
|
77
|
-
|
78
|
-
|
70
|
+
# ここで要素の情報を取得する
|
79
|
-
"data_source": "JANPARA",
|
80
|
-
"title": title,
|
81
|
-
"url": driver.current_url,
|
82
|
-
"color": None,
|
83
|
-
"condition": unused_label.text,
|
84
|
-
"accessories": None,
|
85
|
-
"is_sim_free": None,
|
86
|
-
"price": int(re.sub("\D", "", driver.find_elements(By.CLASS_NAME, 'detail_item_money')[-1].text)),
|
87
|
-
"description": None
|
88
|
-
|
89
|
-
})
|
90
|
-
|
91
|
-
used_label = driver.find_element(By.CSS_SELECTOR, ".items > label[for='used-1']")
|
92
|
-
used_label.click()
|
93
|
-
|
94
|
-
title = driver.find_element(By.CSS_SELECTOR, '.sub_cont>h3').text
|
95
|
-
|
96
|
-
conditions = driver.find_elements(By.CSS_SELECTOR, ".items > label[for*='状態']")
|
97
|
-
options = driver.find_elements(By.CSS_SELECTOR, ".items > label[for*='付属品']")
|
98
|
-
for condition, option in itertools.product(conditions, options): #Xの10通り、Yの10通りの全組み合わせ
|
99
|
-
condition.click()
|
100
|
-
option.click()
|
101
|
-
|
102
|
-
price = int(re.sub("\D", "", driver.find_element(By.ID, 'satei_amt').text))
|
103
|
-
result = {
|
104
|
-
"data_source": "JANPARA",
|
105
|
-
"title": title,
|
106
|
-
"url": driver.current_url,
|
107
|
-
"color": None,
|
108
|
-
"condition": condition.text,
|
109
|
-
"accessories": option.text,
|
110
|
-
"is_sim_free": None,
|
111
|
-
"price": price,
|
112
|
-
"description": None
|
113
|
-
}
|
114
|
-
results.append(result)
|
115
71
|
|
116
72
|
except Exception as e:
|
117
73
|
print(e)
|
118
74
|
continue
|
119
75
|
|
76
|
+
# 次ページへ
|
120
77
|
nextlink = get_nextlink("/buy/search/result/" ,soup)
|
121
78
|
if not nextlink:
|
122
79
|
break
|
1
ソースこーどの修正
test
CHANGED
File without changes
|
test
CHANGED
@@ -38,6 +38,8 @@
|
|
38
38
|
### 該当のソースコード
|
39
39
|
|
40
40
|
```python
|
41
|
+
|
42
|
+
|
41
43
|
# メイン処理
|
42
44
|
DOMAIN = "https://www.xxxxxxx.co.jp/"
|
43
45
|
START_URL = "https://www.xxxxxx.co.jp/buy/search/result/?KEYWORDS=&OUTCLSCODE=46&CLSCODE=&LINE=24"
|
@@ -119,6 +121,79 @@
|
|
119
121
|
if not nextlink:
|
120
122
|
break
|
121
123
|
|
124
|
+
class TorControlPortClient:
|
125
|
+
control_address: str
|
126
|
+
control_port: int
|
127
|
+
control_password: Optional[str]
|
128
|
+
|
129
|
+
def __init__(
|
130
|
+
self,
|
131
|
+
control_address: str,
|
132
|
+
control_port: int,
|
133
|
+
control_password: Optional[str] = None
|
134
|
+
):
|
135
|
+
self.control_address = control_address
|
136
|
+
self.control_port = control_port
|
137
|
+
self.control_password = control_password
|
138
|
+
|
139
|
+
def change_connection_ip(self, seconds_wait: int = 5) -> bool:
|
140
|
+
time.sleep(seconds_wait)
|
141
|
+
try:
|
142
|
+
tor_connection = socket.create_connection((self.control_address, self.control_port))
|
143
|
+
password_value = self.control_password if self.control_password is not None else ''
|
144
|
+
message = f'AUTHENTICATE "{password_value}"\r\nSIGNAL NEWNYM\r\n'
|
145
|
+
tor_connection.send(message.encode('utf-8'))
|
146
|
+
response = tor_connection.recv(1024)
|
147
|
+
if response != b'250 OK\r\n250 OK\r\n':
|
148
|
+
sys.stderr.write('Unexpected response from Tor control port: {}\n'.format(response))
|
149
|
+
return False
|
150
|
+
return True
|
151
|
+
except Exception as e:
|
152
|
+
print(e)
|
153
|
+
sys.stderr.write('Error connecting to Tor control port: {}\n'.format(repr(e)))
|
154
|
+
return False
|
155
|
+
|
156
|
+
@retry(wait=wait_exponential(multiplier=1, min=3, max=50))
|
157
|
+
def get_html(url):
|
158
|
+
"""
|
159
|
+
HTTPリクエストしてBeautifulSoupオブジェクトに変換する
|
160
|
+
"""
|
161
|
+
headers = {
|
162
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36"
|
163
|
+
}
|
164
|
+
|
165
|
+
proxies = {
|
166
|
+
'http': 'socks5://localhost:9050',
|
167
|
+
'https': 'socks5://localhost:9050',
|
168
|
+
}
|
169
|
+
|
170
|
+
time.sleep(2)
|
171
|
+
|
172
|
+
res = requests.get(url, headers=headers, proxies=proxies)
|
173
|
+
print(res.content)
|
174
|
+
if 300 <= res.status_code <= 599:
|
175
|
+
tor_control_port_client = TorControlPortClient('localhost', 9051, 'test1234')
|
176
|
+
tor_control_port_client.change_connection_ip(seconds_wait=3)
|
177
|
+
print("IP Address is Changed")
|
178
|
+
raise Exception("IPチェンジ!!!")
|
179
|
+
|
180
|
+
soup = BeautifulSoup(res.content, 'html.parser')
|
181
|
+
return soup
|
182
|
+
|
183
|
+
def get_detail_urls(soup):
|
184
|
+
items = soup.select(".search_item > h3 > a[href]")
|
185
|
+
for item in items:
|
186
|
+
yield item.get("href")
|
187
|
+
|
188
|
+
def get_nextlink(path, soup):
|
189
|
+
try:
|
190
|
+
nextlink = DOMAIN + path + soup.select_one(".pageLink[title='次ページ']").get("href")
|
191
|
+
except Exception as e:
|
192
|
+
print("Last Page")
|
193
|
+
nextlink = None
|
194
|
+
|
195
|
+
return nextlink
|
196
|
+
|
122
197
|
```
|
123
198
|
|
124
199
|
### 試したこと
|