現在、以下のサイトを参考に画像の収集を行おうと考えています。
参考リンク
コードを利用させていただいたところ、AttributeErrorが出てしまいました。ネットで調べたところ同様の質問を見つけたため、コードを書き換えてみました。参考させていただいたURLが以下の通りです。
質問URL
最終的なコードが以下の通りです。
python
1import os 2import sys 3import traceback 4from mimetypes import guess_extension 5from time import time, sleep 6from urllib.request import urlopen, Request 7from urllib.parse import quote 8from bs4 import BeautifulSoup 9 10MY_EMAIL_ADDR = '' 11 12class Fetcher: 13 def __init__(self, ua=''): 14 self.ua = ua 15 16 def fetch(self, url): 17 req = Request(url, headers={'User-Agent': self.ua}) 18 try: 19 with urlopen(req, timeout=3) as p: 20 b_content = p.read() 21 mime = p.getheader('Content-Type') 22 except: 23 sys.stderr.write('Error in fetching {}\n'.format(url)) 24 sys.stderr.write(traceback.format_exc()) 25 return None, None 26 return b_content, mime 27 28fetcher = Fetcher(MY_EMAIL_ADDR) 29 30def fetch_and_save_img(word): 31 data_dir = 'data/' 32 if not os.path.exists(data_dir): 33 os.makedirs(data_dir) 34 35 for i, img_url in enumerate(img_url_list(word)): 36 sleep(0.1) 37 img, mime = fetcher.fetch(img_url) 38 if not mime or not img: 39 continue 40 ext = guess_extension(mime.split(';')[0]) 41 if ext in ('.jpe', '.jpeg'): 42 ext = '.jpg' 43 if not ext: 44 continue 45 result_file = os.path.join(data_dir, str(i) + ext) 46 with open(result_file, mode='wb') as f: 47 f.write(img) 48 print('fetched', img_url) 49 50 51def img_url_list(word): 52 """ 53 using yahoo (this script can't use at google) 54 """ 55 url = 'https://search.yahoo.co.jp/image/search?p=%E3%82%A8%E3%83%B3%E3%82%B8%E3%82%A7%E3%83%AB%E3%83%AD%E3%83%BC%E3%83%89&oq=&ei=UTF-8&xargs=2&b={}'.format(quote(word)) 56 byte_content, _ = fetcher.fetch(url) 57 structured_page = BeautifulSoup(byte_content.decode('UTF-8'), 'html.parser') 58 img_link_elems = structured_page.find_all('a', attrs={'target': 'imagewin'}) 59 img_urls = [e.get('href') for e in img_link_elems if e.get('href').startswith('http')] 60 img_urls = list(set(img_urls)) 61 return img_urls 62 63if __name__ == '__main__': 64 word = sys.argv[1] 65 fetch_and_save_img(word) 66
しかし、上記のコードに書き直し実行しても同じエラーが吐かれ、進まなくなっている状態です。アドバイスしていただけると助かります。よろしくお願いします。
エラー内容 Error in fetching https://search.yahoo.co.jp/image/search?p=%E3%82%A8%E3%83%B3%E3%82%B8%E3%82%A7%E3%83%AB%E3%83%AD%E3%83%BC%E3%83%89&oq=&ei=UTF-8&xargs=2&b=1 Traceback (most recent call last): File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 1317, in do_open encode_chunked=req.has_header('Transfer-encoding')) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1229, in request self._send_request(method, url, body, headers, encode_chunked) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1275, in _send_request self.endheaders(body, encode_chunked=encode_chunked) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1224, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1016, in _send_output self.send(msg) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 956, in send self.connect() File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1392, in connect server_hostname=server_hostname) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py", line 412, in wrap_socket session=session File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py", line 853, in _create self.do_handshake() File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py", line 1117, in do_handshake self._sslobj.do_handshake() ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056) During handling of the above exception, another exception occurred: Traceback (most recent call last): File "scriping.py", line 21, in fetch with urlopen(req, timeout=3) as p: File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 222, in urlopen return opener.open(url, data, timeout) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 525, in open response = self._open(req, data) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 543, in _open '_open', req) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 503, in _call_chain result = func(*args) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 1360, in https_open context=self._context, check_hostname=self._check_hostname) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 1319, in do_open raise URLError(err) urllib.error.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056)> Traceback (most recent call last): File "scriping.py", line 79, in <module> fetch_and_save_img(num) File "scriping.py", line 39, in fetch_and_save_img img_urls = img_url_list(num_self)[0] File "scriping.py", line 65, in img_url_list soup = BeautifulSoup(byte_content.decode('UTF-8'), 'html.parser') AttributeError: 'NoneType' object has no attribute 'decode'
開発環境
python3.7.3
mac OS
回答2件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。