【Python】Seleniumでページ全体のスクリーンショットを撮りたい（スクロールはするがページ下部が切れてしまう）

Seleniumで複数のurlでページ全体のスクリーンショットを撮りたい

のですが、ページ最下部までスクロールはするはずがページが画像のように途中で切れてしまいます。

以下コードです

ptyhon
1import time
2
3from selenium import webdriver
4from selenium.webdriver.chrome.options import Options
5
6#ヘッドレスでdriver立ち上げ
7options = Options()
8options.add_argument('--headless')
9driver = webdriver.Chrome(executable_path='/Users/○○○○/Desktop/WebDriver/chromedriver',options=options)
10
11urls = [
12        "https://carmo-kun.jp/lp/35/",
13        "https://carmo-kun.jp/lp/36/"
14        ]
15
16
17
18for url in urls:
19    driver.get(url)
20    time.sleep(3)
21
22    page_height = driver.execute_script("return document.body.scrollHeight;")
23
24    driver.set_window_size(1300,  page_height)
25
26    url_name = url.lstrip("https://" "http://" ).replace("/","_").replace("?", "_")
27
28    driver.save_screenshot("/Users/○○○○○/Desktop/WebDriver/image_" + str(url_name) + ".png")
29    
30driver.quit()
31

どのように修正すれば、最下部までスクリーンショットが取れるのでしょうか？
よろしくお願いします。

行動規範の内容に同意します

回答3件

参考:Selenium でページ全体のスクリーンショットを撮る (Python)

Python
1 # -*- coding: utf-8 -*-
2"""
3This code provides the ability to take a screenshot of the entire page with Selenium WebDriver.
4"""
5
6import io
7from time import sleep
8from typing import List, Tuple, Optional
9from PIL import Image
10
11from selenium import webdriver
12from selenium.webdriver.chrome.options import Options
13
14def get_full_screenshot_image(driver, reverse=False, driverss_contains_scrollbar=None):
15    # type: (selenium.webdriver.remote.webdriver.WebDriver, bool, Optional[bool]) -> Image.Image
16    """
17    take full screenshot and get its Pillow instance
18    :param driver: Selenium WebDriver
19    :param reverse: Paste from bottom direction when combining images. The default is False.
20    :param driverss_contains_scrollbar: Set to True if the screenshot taken by WebDriver contains a horizontal scroll bar. Default is determined automatically.
21    """
22    if driverss_contains_scrollbar is None:
23        driverss_contains_scrollbar = isinstance(driver, webdriver.Chrome)
24    # Scroll to the bottom of the page once
25    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
26    sleep(0.5)
27    scroll_height, document_client_width, document_client_height, inner_width, inner_height = driver.execute_script("return [document.body.scrollHeight, document.documentElement.clientWidth, document.documentElement.clientHeight, window.innerWidth, window.innerHeight]")
28    streams_to_be_closed = []   # type: List[io.BytesIO]
29    images = [] # type: List[Tuple[Image.Image, int]]
30    try:
31        # open
32        for y_coord in range(0, scroll_height, document_client_height):
33            driver.execute_script("window.scrollTo(0, arguments[0]);", y_coord)
34            stream = io.BytesIO(driver.get_screenshot_as_png())
35            streams_to_be_closed.append(stream)
36            img = Image.open(stream)
37            images.append((img, min(y_coord, scroll_height - inner_height)))  # Image, y_coord
38        # load
39        scale = float(img.size[0]) / (inner_width if driverss_contains_scrollbar else document_client_width)
40        img_dst = Image.new(mode='RGBA', size=(int(document_client_width * scale), int(scroll_height * scale)))
41        for img, y_coord in (reversed(images) if reverse else images):
42            img_dst.paste(img, (0, int(y_coord * scale)))
43        return img_dst
44    finally:
45        # close
46        for stream in streams_to_be_closed:
47            stream.close()
48        for img, y_coord in images:
49            img.close()
50
51options = Options()
52options.add_argument('--headless')
53driver = webdriver.Chrome(executable_path='/Users/○○○○/Desktop/WebDriver/chromedriver',options=options)
54
55urls = [
56        "https://carmo-kun.jp/lp/35/",
57        "https://carmo-kun.jp/lp/36/"
58        ]
59
60for url in urls:
61    driver.get(url)
62    sleep (3)
63
64    img = get_full_screenshot_image(driver) 
65       
66    img_name = /Users/○○○○○/Desktop/WebDriver/image_" +  url.lstrip("https://" "http://" ).replace("/","_").replace("?", "_") + ".png"
67    img.save(img_name)

投稿2020/10/15 20:02

編集2020/10/15 20:23

Reach

総合スコア735

teratail969

2020/10/21 05:46 編集

ありがとうございます！ただ撮れているスクショは横幅が足りていなくて切れてしまっています。。こちらの質問のですが、スクロールしてスクショして行って全体を一枚の画像にするコードで自分の方で課題解決できましたので、大丈夫です！

行動規範の内容に同意します

自己解決

私の方で、スクロールしてスクショとっていきつなげるコードで
切れずに最後までページ全体のスクリーンショットが撮れたので
この質問はしめさせていただきます。
いろいろご回答いただきありがとうございました。

![]

以下コードです

python
1from PIL import Image
2import io
3
4def save_fullscreenshot(driver, filename):
5    """ Capture a full-page screenshot using image stitching """
6    orig_overflow = driver.execute_script("return document.body.style.overflow;")
7    driver.execute_script("document.body.style.overflow = 'hidden';")  # scrollbar
8
9    total_height = driver.execute_script("return document.body.scrollHeight;")
10    total_width = driver.execute_script("return document.body.scrollWidth;")
11    view_width = driver.execute_script("return window.innerWidth;")
12    view_height = driver.execute_script("return window.innerHeight;")
13
14    stitched_image = Image.new("RGB", (total_width, total_height))
15
16    scroll_height = 0
17    while scroll_height < total_height:
18        col_count = 0
19        scroll_width = 0
20        driver.execute_script("window.scrollTo(%d, %d)" % (scroll_width, scroll_height))
21
22        while scroll_width < total_width:
23            if col_count > 0:
24                driver.execute_script("window.scrollBy("+str(view_width)+",0)") 
25
26            img = Image.open(io.BytesIO(driver.get_screenshot_as_png()))
27
28            if scroll_width + view_width >= total_width \
29               or scroll_height + view_height >= total_height:  # need cropping
30                new_width = view_width
31                new_height = view_height
32                if scroll_width + view_width >= total_width:
33                    new_width = total_width - scroll_width
34                if scroll_height + view_height >= total_height:
35                    new_height = total_height - scroll_height
36
37                stitched_image.paste(
38                    img.crop((view_width - new_width, view_height - new_height,
39                              view_width, view_height)),
40                    (scroll_width, scroll_height)
41                )
42                scroll_width += new_width
43            else:  # no cropping
44                stitched_image.paste(img, (scroll_width, scroll_height))
45                scroll_width += view_width
46            col_count += 1
47
48        scroll_height += view_height
49
50    driver.execute_script("document.body.style.overflow = '" + orig_overflow + "';")
51    stitched_image.save(filename)
52
53    return True
54
55
56
57from selenium import webdriver
58from selenium.webdriver.common.keys import Keys
59import time
60
61driver = webdriver.Chrome(executable_path='/Users/○○○○/Desktop/WebDriver/chromedriver')
62
63
64urls = [
65
66         "https://carmo-kun.jp/lp/35/",
67         "https://carmo-kun.jp/lp/36/"
68        ]
69
70for url in urls:
71    driver.get(url)
72    time.sleep(1)
73
74    url_name = url.lstrip("https://" "http://" ).replace("/","_").replace("?", "_")
75
76    save_fullscreenshot(driver, "/Users/○○○○/Desktop/WebDriver/" + str(url_name) + ".png")
77    
78driver.quit()
79