画像スクレイピングの勉強をしています。
いくつかのwebサイトを参考に
requests.get()
urllib.request.urlopen()
を用いています。
しかし以下のようなエラーが発生してしまいます。
タイムアウトを設定しない場合は処理が終了しません。
どなたか解決方法または調べるポイントなどをご教授いただければと存じます。
sample1.py
Python3
1import requests 2import ssl 3ssl._create_default_https_context = ssl._create_unverified_context 4 5headers = { 6 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36", 7 } 8 9req = requests.get(url, headers=headers, stream=True, timeout=10)
sample2.py
Python3
1import urllib.request, urllib.error, urllib.parse 2import ssl 3ssl._create_default_https_context = ssl._create_unverified_context 4 5headers = { 6 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36", 7 } 8 9req = urllib.request.Request(url=url, headers=headers) 10res = urllib.request.urlopen(url=req, timeout=10)
###requests.get()エラー
Traceback (most recent call last): File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/connectionpool.py", line 445, in _make_request six.raise_from(e, None) File "<string>", line 3, in raise_from File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/connectionpool.py", line 440, in _make_request httplib_response = conn.getresponse() File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/http/client.py", line 1347, in getresponse response.begin() File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/http/client.py", line 307, in begin version, status, reason = self._read_status() File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/http/client.py", line 268, in _read_status line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/socket.py", line 704, in readinto return self._sock.recv_into(b) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/ssl.py", line 1241, in recv_into return self.read(nbytes, buffer) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/ssl.py", line 1099, in read return self._sslobj.read(len, buffer) socket.timeout: The read operation timed out During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/requests/adapters.py", line 439, in send resp = conn.urlopen( File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/connectionpool.py", line 755, in urlopen retries = retries.increment( File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/util/retry.py", line 532, in increment raise six.reraise(type(error), error, _stacktrace) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/packages/six.py", line 770, in reraise raise value File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/connectionpool.py", line 699, in urlopen httplib_response = self._make_request( File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/connectionpool.py", line 447, in _make_request self._raise_timeout(err=e, url=url, timeout_value=read_timeout) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/urllib3/connectionpool.py", line 336, in _raise_timeout raise ReadTimeoutError( urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='jp.louisvuitton.com', port=443): Read timed out. (read timeout=10) During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/Users/username/Desktop/scraping/study/scrayping_file/trim.py", line 31, in <module> req = requests.get(url, headers=headers, stream=True, timeout=10) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/requests/api.py", line 76, in get return request('get', url, params=params, **kwargs) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/requests/api.py", line 61, in request return session.request(method=method, url=url, **kwargs) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/requests/sessions.py", line 542, in request resp = self.send(prep, **send_kwargs) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/requests/sessions.py", line 655, in send r = adapter.send(request, **kwargs) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/site-packages/requests/adapters.py", line 529, in send raise ReadTimeout(e, request=request) requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='jp.louisvuitton.com', port=443): Read timed out. (read timeout=10)
###urllib.request.urlopen()エラー
Traceback (most recent call last): File "/Users/username/Desktop/scraping/study/scrayping_file/sample.py", line 59, in <module> res = urllib.request.urlopen(url=req, timeout=10) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/urllib/request.py", line 214, in urlopen return opener.open(url, data, timeout) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/urllib/request.py", line 517, in open response = self._open(req, data) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/urllib/request.py", line 534, in _open result = self._call_chain(self.handle_open, protocol, protocol + File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/urllib/request.py", line 494, in _call_chain result = func(*args) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/urllib/request.py", line 1385, in https_open return self.do_open(http.client.HTTPSConnection, req, File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/urllib/request.py", line 1346, in do_open r = h.getresponse() File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/http/client.py", line 1347, in getresponse response.begin() File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/http/client.py", line 307, in begin version, status, reason = self._read_status() File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/http/client.py", line 268, in _read_status line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/socket.py", line 704, in readinto return self._sock.recv_into(b) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/ssl.py", line 1241, in recv_into return self.read(nbytes, buffer) File "/Users/username/.pyenv/versions/3.9.0/lib/python3.9/ssl.py", line 1099, in read return self._sslobj.read(len, buffer) socket.timeout: The read operation timed out
###補足情報
python 3.9.0
あなたの回答
tips
プレビュー