python3でGoogleの画像検索APIを使って画像を大量に収集するプログラムを作っています。
下記がソースなのですが、これでターミナルで実行すると
下記のようなエラーが出てしまいます。
「サーバー内部のエラー」のようなのですが、python3初心者でして、原因と解決策をアドバイス頂けると
幸いです。
「エラー」
Reading page number: 1 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 2 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 3 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 4 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 5 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 6 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 7 <HttpError 500 when requesting https://www.googleapis.com/customsearch/v1?q=%E3%83%A2%E3%83%BC%E3%83%AA%E3%83%BC%E3%83%AD%E3%83%90%E3%83%BC%E3%83%88%E3%82%BD%E3%83%B3&cx=008704579318889713571%3Auc3qiq5oulk&lr=lang_ja&num=10&start=1&searchType=image&key=AIzaSyCJ_CFi-1tgg3EI5CHzQDjK9mQCvyXyJMY&alt=json returned ""> Reading page number: 8 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 9 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned ""> Reading page number: 10 <HttpError 500 when requesting https://www.googleapis.com/customsearch/xxxxxxxxxxx&alt=json returned "">
「ソース」
#-*- coding:utf-8 -*- import urllib.request import httplib2 import json import os import pickle import hashlib import sha3 from googleapiclient.discovery import build def make_dir(path): if not os.path.isdir(path): os.mkdir(path) def make_correspondence_table(correspondence_table, original_url, hashed_url): correspondence_table[original_url] = hashed_url def getImageUrl(api_key, cse_key, search_word, page_limit, save_dir_path): service = build("customsearch", "v1", developerKey=api_key) page_limit = page_limit startIndex = 1 response = [] img_list = [] make_dir(save_dir_path) save_res_path = os.path.join(save_dir_path, 'api_response_file') make_dir(save_res_path) for nPage in range(0, page_limit): print("Reading page number:", nPage + 1) try: response.append(service.cse().list( q=search_word, # Search words cx=cse_key, # custom search engine key lr='lang_ja', # Search language num=10, # Number of images obtained by one request (Max 10) start=startIndex, searchType='image' # search for images ).execute()) startIndex = response[nPage].get("queries").get("nextPage")[0].get("startIndex") except Exception as e: print(e) with open(os.path.join(save_res_path, 'api_response.pickle'), mode='wb') as f: pickle.dump(response, f) for one_res in range(len(response)): if len(response[one_res]['items']) > 0: for i in range(len(response[one_res]['items'])): img_list.append(response[one_res]['items'][i]['link']) return img_list def getImage(save_dir_path, img_list): make_dir(save_dir_path) save_img_path = os.path.join(save_dir_path, 'imgs') make_dir(save_img_path) opener = urllib.request.build_opener() http = httplib2.Http(".cache") for i in range(len(img_list)): try: url = img_list[i] extension = os.path.splitext(img_list[i])[-1] if extension.lower() in ('.jpg', '.jpeg', '.gif', '.png', '.bmp'): encoded_url = url.encode('utf-8') # required encoding for hashed hashed_url = hashlib.sha3_256(encoded_url).hexdigest() full_path = os.path.join(save_img_path, hashed_url + extension.lower()) response, content = http.request(url) with open(full_path, 'wb') as f: f.write(content) print('saved image... {}'.format(url)) make_correspondence_table(correspondence_table, url, hashed_url) except: print("failed to download images.") continue if __name__ == '__main__': # -------------- Parameter and Path Settings -------------- # API_KEY = 'XXXXX' CUSTOM_SEARCH_ENGINE = 'XXXXX:XXXXX' page_limit = 10 search_word = 'モーリーロバートソン' save_dir_path = '/Users/xxxxxxxxx/Desktop/MyData/python/img/' correspondence_table = {} img_list = getImageUrl(API_KEY, CUSTOM_SEARCH_ENGINE, search_word, page_limit, save_dir_path) getImage(save_dir_path, img_list) correspondence_table_path = os.path.join(save_dir_path, 'corr_table') make_dir(correspondence_table_path) with open(os.path.join(correspondence_table_path, 'corr_table.json'), mode='w') as f: json.dump(correspondence_table, f)
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
退会済みユーザー
2018/08/26 04:53 編集
退会済みユーザー
2018/08/26 06:57
2018/08/26 07:02
2018/08/26 07:05
退会済みユーザー
2018/08/26 07:14 編集
2018/08/26 07:14 編集
退会済みユーザー
2018/08/26 07:15
退会済みユーザー
2018/08/26 07:20