以下のコードをpython3系で動かせるようにしたい。

前提・実現したいこと

ァイルに書かれた、単語リストを読み取って、各単語を google イメージ検索で検索し、そのブラウザの画面毎キャプチャしファイルに保存するスクリプト
のコード

# -*- coding: utf-8 -*-

import sys

# START: Frawned approach to change default encoding
# But I intentionally take this approach since it's easy and believe it non-problematic in this limited program.
# See discussion detail here.
# https://stackoverflow.com/questions/3828723/why-should-we-not-use-sys-setdefaultencodingutf-8-in-a-py-script
reload(sys)
sys.setdefaultencoding('UTF8')
# END: Frawned approach to change default encoding

import StringIO
import os
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from PIL import Image
import time
import errno
from optparse import OptionParser
import re
import hashlib

def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

def get_filename(text):
    if re.search('[^\w.\-_]', text):
        return hashlib.sha256(text.encode('utf-8')).hexdigest()
    else:
        return text

def save_snapshot(driver, word, idx):
    fname = os.path.join(Options.dir, "%s.jpg" % get_filename(Options.prefix + word))
    idx = "%03d" % (idx + 1)

    if os.path.isfile(fname) and (not Options.force_save):
        print("  ! %s: %s exists!" % (idx, fname))
        return

    url_template = Engines[Options.engine]
    driver.get(url_template % word)

    if Options.js_before_save:
        with open(Options.js_before_save) as f:
            driver.execute_script(f.read())

    driver.execute_script("document.body.style.overflow = 'hidden';")

    # See: https://gist.github.com/jsok/9502024
    screen = driver.get_screenshot_as_png()
    image = Image.open(StringIO.StringIO(screen))
    image.convert("RGB").save(fname, 'JPEG', optimize=True)
    print("  %s %s: %s" % (u'\u2713', idx, fname))
    time.sleep(Options.sleep)

def get_words_from_file(fname):
    with open(fname) as f:
        content = f.readlines()
    content = [x.split("\t")[0].rstrip() for x in content]
    return content

def retrieve_snapshot_for_words(driver, words):
    for idx, word in enumerate(words):
        save_snapshot(driver, word, idx)

Options = {}
Engines = {
    "google": 'https://www.google.com/search?gl=us&hl=en&pws=0&gws_rd=cr&tbm=isch&safe=active&q=%s',
    "google_unsafe": 'https://www.google.com/search?gl=us&hl=en&pws=0&gws_rd=cr&tbm=isch&q=%s',
    "bing": 'https://www.bing.com/images/search?safeSearch=Moderate&mkt=en-US&q=%s',
    "bing_unsafe": 'https://www.bing.com/images/search?safeSearch=Off&mkt=en-US&q=%s',
}

def main():
    global Options

    usage = "usage: %prog [options] word-list"
    scroll_to_first_image_of_google = "document.getElementById('islmp').scrollIntoView(true)"
    scroll_to_first_carousel_of_google = "document.getElementsByTagName('scrolling-carousel')[0].scrollIntoView()"

    parser = OptionParser(usage=usage)
    parser.add_option("-d", "--dir", dest="dir", help="Directory to write captured images.", default="slideshow/imgs")
    parser.add_option("-j", "--js-before-save", dest="js_before_save", help="Eval js file before save mainly to scroll to element", default="")
    parser.add_option("-f", "--force-save", action="store_true", dest="force_save", help="Overwrite existing file if exists", default=False)
    parser.add_option("-p", "--prefix", dest="prefix", help="Prefix for filename", default="")
    parser.add_option("-w", "--window", dest="window", help="Window size. 1280x720 by default.", default="1280x720")
    parser.add_option("-e", "--engine", dest="engine", help="Image search engine to use one of %s" % Engines.keys(), default="google")
    parser.add_option("-s", "--show", action="store_true", dest="show", help="Do not hide chrome browser", default=False)
    parser.add_option("--sleep", dest="sleep", type="float", help="Sleep duration on each take", default=1.0)
    (Options, args) = parser.parse_args()

    if Options.engine not in Engines:
        print("Engine must be one of %s" % Engines.keys())
        exit(1)

    chrome_options = webdriver.ChromeOptions()
    if not Options.show:
        chrome_options.add_argument('--headless')
    chrome_options.add_argument('--hide-scrollbars')
    driver = webdriver.Chrome(options=chrome_options)

    (screen_width, screen_height) = Options.window.split("x")
    driver.set_window_size(screen_width, screen_height)
    print(Options)

    mkdir_p(Options.dir)

    for file in args:
        print(file + ': start')
        retrieve_snapshot_for_words(driver, get_words_from_file(file))
    driver.quit()

main()

をpython3で動くように修正したいです。

発生している問題・エラーメッセージ

記載されているコード

python bulk-screen-capture.py -d collection.media -p "google-img--" -j before_scroll.js -w 720x720 sample.tsv

を実行したところ

Traceback (most recent call last):
  File "bulk-screen-capture.py", line 9, in <module>
    reload(sys)
NameError: name 'reload' is not defined

が出てうまく実行できませんでした。

試したこと

python3系ではデフォルトエンコードがUTF8のため、
sys.setdefaultencoding('UTF8')は推奨されていないとのことで、

import sys
reload(sys)
sys.setdefaultencoding('UTF8')

は削除しました。

StringIOもpython3系では対応していないため、
import StringIOも
import io as cStringIOに書き換え、
58行目の
image = Image.open(cStringIO.StringIO(screen))
を
image = Image.open(cStringIO.StringIO(screen))
に書き換えたら、以下のエラーが表示されました。

{'dir': 'collection.media', 'js_before_save': 'before_scroll.js', 'force_save': False, 'prefix': 'google-img--', 'window': '720x720', 'engine': 'google', 'show': False, 'sleep': 1.0}
sample.tsv: start
Traceback (most recent call last):
  File "bulk-screen-capture.py", line 120, in <module>
    main()
  File "bulk-screen-capture.py", line 117, in main
    retrieve_snapshot_for_words(driver, get_words_from_file(file))
  File "bulk-screen-capture.py", line 71, in retrieve_snapshot_for_words
    save_snapshot(driver, word, idx)
  File "bulk-screen-capture.py", line 58, in save_snapshot
    image = Image.open(cStringIO.StringIO(screen))
TypeError: initial_value must be str or None, not bytes

ここにより詳細な情報を記載してください。
マシン環境はm1macbookです。
pythonのversionは3.8.2です
pip listは
Package Version

Pillow 8.1.0
pip 21.0
selenium 3.141.0
setuptools 41.2.0
six 1.15.0
urllib3 1.26.2
wheel 0.33.1
です。

meg_

2021/01/25 08:03

python2系で動かすのでは駄目なのでしょうか？

68user

2021/01/25 08:10

修正したけれども、まだエラーが出ているのですか?

Daregada

2021/01/25 08:13

「reload(sys)」や「sys.setdefaultencoding('UTF8')」を削除したのであれば、提示されているエラーは出なくなるので、その後は何が問題になっているのか書いてください。

apollo_

2021/01/25 09:17

{'dir': 'collection.media', 'js_before_save': 'before_scroll.js', 'force_save': False, 'prefix': 'google-img--', 'window': '720x720', 'engine': 'google', 'show': False, 'sleep': 1.0} sample.tsv: start Traceback (most recent call last): File "bulk-screen-capture.py", line 120, in <module> main() File "bulk-screen-capture.py", line 117, in main retrieve_snapshot_for_words(driver, get_words_from_file(file)) File "bulk-screen-capture.py", line 71, in retrieve_snapshot_for_words save_snapshot(driver, word, idx) File "bulk-screen-capture.py", line 58, in save_snapshot image = Image.open(cStringIO.StringIO(screen)) TypeError: initial_value must be str or None, not bytes となり、エラーが出ています。