前提・実現したいこと
python学習中のプログラミング初心者です。
@mijamija1995さんの
Google APIを使わずにGoogle画像検索で画像収集
の記事の中にあるコードをお借りして実行したところこのようなメッセージが出ました。
発生している問題・エラーメッセージ
python
1Words 0 : 5x"apple juice" 2Search : apple juice ; number : 5; first_position : 1 ; scrolls : 1 3Less images found:Message: Unable to locate element: //input[@value='Show more results'] 4 5Total images:0 6 7Total skipped : 0; Total downloaded : 0/0 8
また、この実行結果は画像が取得されていないと思うのですが、画像が取得されたときはどのファイルに保存されるのでしょうか。是非教えていただきたいです。
該当のソースコード
python
1from selenium import webdriver 2from selenium.webdriver.common.keys import Keys 3import os 4import json 5import urllib 6import sys 7import time 8 9# adding path to geckodriver to the OS environment variable 10os.environ["PATH"] += os.pathsep + os.getcwd() 11 12# Configuration 13download_path = "果物/" 14# Images 15words_to_search = ['apple juice'] 16nb_to_download = [5] 17first_image_position = [1] 18 19def main(): 20 if len(words_to_search) != len(nb_to_download) or len(nb_to_download) != len(first_image_position) : 21 raise ValueError('You may have forgotten to configure one of the lists (length is different)') 22 i= 0 23 # For each word in the list, we download the number of images requested 24 while i<len(words_to_search): 25 print("Words "+str(i)+" : "+str(nb_to_download[i])+"x\""+words_to_search[i]+"\"") 26 if nb_to_download[i] > 0: 27 search_and_save(words_to_search[i],nb_to_download[i], first_image_position[i]) 28 i+=1 29 30 31def search_and_save(text, number, first_position): 32 # Number_of_scrolls * 400 images will be opened in the browser 33 number_of_scrolls = int((number + first_position)/ 400 + 1) 34 print("Search : "+text+" ; number : "+str(number)+"; first_position : "+str(first_position)+" ; scrolls : "+str(number_of_scrolls)) 35 36 # Create directories to save images 37 if not os.path.exists(download_path + text.replace(" ", "_")): 38 os.makedirs(download_path + text.replace(" ", "_")) 39 40 # Connect to Google Image 41 url = "https://www.google.co.in/search?q="+text+"&source=lnms&tbm=isch" 42 driver = webdriver.Firefox() 43 driver.get(url) 44 headers = {} 45 headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" 46 extensions = {"jpg", "jpeg", "png", "gif"} 47 48 img_count = 0 49 downloaded_img_count = 0 50 img_skip = 0 51 52 # Prepare Google Page 53 for _ in range(number_of_scrolls): 54 for __ in range(10): 55 # Multiple scrolls needed to show all 400 images 56 driver.execute_script("window.scrollBy(0, 1000000)") 57 time.sleep(0.2) 58 # to load next 400 images 59 time.sleep(2.5) 60 try: 61 driver.find_element_by_xpath("//input[@value='Show more results']").click() 62 time.sleep(2.5) 63 except Exception as e: 64 print("Less images found:"+ str(e)) 65 break 66 67 # Process (download) images 68 imges = driver.find_elements_by_xpath('//div[contains(@class,"rg_meta")]') 69 print("Total images:"+ str(len(imges)) + "\n") 70 for img in imges: 71 if img_skip < first_position: 72 # Skip first images if asked to 73 img_skip += 1 74 else : 75 # Get image 76 img_count += 1 77 img_url = json.loads(img.get_attribute('innerHTML'))["ou"] 78 img_type = json.loads(img.get_attribute('innerHTML'))["ity"] 79 print("Downloading image "+ str(img_count) + ": "+ img_url) 80 try: 81 if img_type not in extensions: 82 img_type = "jpg" 83 # Download image and save it 84 req = urllib.request.Request(img_url, headers=headers) 85 raw_img = urllib.request.urlopen(req).read() 86 f = open(download_path+text.replace(" ", "_")+"/"+str(img_skip+downloaded_img_count)+"."+img_type, "wb") 87 f.write(raw_img) 88 f.close 89 downloaded_img_count += 1 90 except Exception as e: 91 print("Download failed:"+ str(e)) 92 finally: 93 print("") 94 if downloaded_img_count >= number: 95 break 96 97 print("Total skipped : "+str(img_skip)+"; Total downloaded : "+ str(downloaded_img_count)+ "/"+ str(img_count)) 98 driver.quit() 99 100if __name__ == "__main__": 101 main()
補足情報(FW/ツールのバージョンなど)
実行環境:python3.8.8,spyder
回答1件
あなたの回答
tips
プレビュー
バッドをするには、ログインかつ
こちらの条件を満たす必要があります。
2021/11/25 20:58