前提・実現したいこと
Pythonのrequests.get()関数でWebページをダウンロードしたいのですが,
エラーになるわけでもなくアクセスできないサイトがあります.
具体的にはこちらの通販サイトから,
https://www.asos.com/fila/fila-mini-dress-with-drawstring-waist-and-logo-front/prd/13843263
以下にある商品価格を取得したいと思っております.
<span data-id="current-price" data-bind="text: priceText(), css: {'product-price-discounted' : isDiscountedPrice }, markAndMeasure: 'pdp:price_displayed'" class="current-price product-price-discounted">£36.00</span>
どのようにすればよいでしょうか?
また,requests以外で使えるモジュールはありますか?
発生している問題・エラーメッセージ
timeoutを設定した時のみ以下のエラーが出ますが,それ以外の時はずっと実行中になってしまいます.
Error
1 2--------------------------------------------------------------------------- 3timeout Traceback (most recent call last) 4C:\Anaconda\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 5 383 # otherwise it looks like a programming error was the cause. 6--> 384 six.raise_from(e, None) 7 385 except (SocketTimeout, BaseSSLError, SocketError) as e: 8 9C:\Anaconda\lib\site-packages\urllib3\packages\six.py in raise_from(value, from_value) 10 11C:\Anaconda\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 12 379 try: 13--> 380 httplib_response = conn.getresponse() 14 381 except Exception as e: 15 16C:\Anaconda\lib\http\client.py in getresponse(self) 17 1320 try: 18-> 1321 response.begin() 19 1322 except ConnectionError: 20 21C:\Anaconda\lib\http\client.py in begin(self) 22 295 while True: 23--> 296 version, status, reason = self._read_status() 24 297 if status != CONTINUE: 25 26C:\Anaconda\lib\http\client.py in _read_status(self) 27 256 def _read_status(self): 28--> 257 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 29 258 if len(line) > _MAXLINE: 30 31C:\Anaconda\lib\socket.py in readinto(self, b) 32 588 try: 33--> 589 return self._sock.recv_into(b) 34 590 except timeout: 35 36C:\Anaconda\lib\site-packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs) 37 308 else: 38--> 309 return self.recv_into(*args, **kwargs) 39 310 40 41C:\Anaconda\lib\site-packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs) 42 306 if not util.wait_for_read(self.socket, self.socket.gettimeout()): 43--> 307 raise timeout('The read operation timed out') 44 308 else: 45 46timeout: The read operation timed out 47 48During handling of the above exception, another exception occurred: 49 50ReadTimeoutError Traceback (most recent call last) 51C:\Anaconda\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 52 448 retries=self.max_retries, 53--> 449 timeout=timeout 54 450 ) 55 56C:\Anaconda\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 57 637 retries = retries.increment(method, url, error=e, _pool=self, 58--> 638 _stacktrace=sys.exc_info()[2]) 59 639 retries.sleep() 60 61C:\Anaconda\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace) 62 366 if read is False or not self._is_method_retryable(method): 63--> 367 raise six.reraise(type(error), error, _stacktrace) 64 368 elif read is not None: 65 66C:\Anaconda\lib\site-packages\urllib3\packages\six.py in reraise(tp, value, tb) 67 685 raise value.with_traceback(tb) 68--> 686 raise value 69 687 70 71C:\Anaconda\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 72 599 body=body, headers=headers, 73--> 600 chunked=chunked) 74 601 75 76C:\Anaconda\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 77 385 except (SocketTimeout, BaseSSLError, SocketError) as e: 78--> 386 self._raise_timeout(err=e, url=url, timeout_value=read_timeout) 79 387 raise 80 81C:\Anaconda\lib\site-packages\urllib3\connectionpool.py in _raise_timeout(self, err, url, timeout_value) 82 305 if isinstance(err, SocketTimeout): 83--> 306 raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) 84 307 85 86ReadTimeoutError: HTTPSConnectionPool(host='www.asos.com', port=443): Read timed out. (read timeout=10) 87 88During handling of the above exception, another exception occurred: 89 90ReadTimeout Traceback (most recent call last) 91<ipython-input-16-e3d95d8c78cb> in <module> 92 1 url = 'https://www.asos.com/fila/fila-mini-dress-with-drawstring-waist-and-logo-front/prd/13843263' 93----> 2 res = requests.get(url, timeout = 10) 94 95C:\Anaconda\lib\site-packages\requests\api.py in get(url, params, **kwargs) 96 73 97 74 kwargs.setdefault('allow_redirects', True) 98---> 75 return request('get', url, params=params, **kwargs) 99 76 100 77 101 102C:\Anaconda\lib\site-packages\requests\api.py in request(method, url, **kwargs) 103 58 # cases, and look like a memory leak in others. 104 59 with sessions.Session() as session: 105---> 60 return session.request(method=method, url=url, **kwargs) 106 61 107 62 108 109C:\Anaconda\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json) 110 531 } 111 532 send_kwargs.update(settings) 112--> 533 resp = self.send(prep, **send_kwargs) 113 534 114 535 return resp 115 116C:\Anaconda\lib\site-packages\requests\sessions.py in send(self, request, **kwargs) 117 644 118 645 # Send the request 119--> 646 r = adapter.send(request, **kwargs) 120 647 121 648 # Total elapsed time of the request (approximately) 122 123C:\Anaconda\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 124 527 raise SSLError(e, request=request) 125 528 elif isinstance(e, ReadTimeoutError): 126--> 529 raise ReadTimeout(e, request=request) 127 530 else: 128 531 raise 129 130ReadTimeout: HTTPSConnectionPool(host='www.asos.com', port=443): Read timed out. (read timeout=10)
該当のソースコード
Python
1url = 'https://www.asos.com/fila/fila-mini-dress-with-drawstring-waist-and-logo-front/prd/13843263' 2res = requests.get(url, timeout = 10)
試したこと
申し訳ないのですが,初心者で試行錯誤の余地がないため,これといって試すことのできたものはありません.
urllib.requests()も同様に使えませんでした.
補足情報(FW/ツールのバージョンなど)
Python 3, chromeでJupyterLabを用いています.
同じウィンドウ上の別のタブでは当該URLを開くことは可能でした.
回答2件
あなたの回答
tips
プレビュー