Closed2024/05/11にクローズ2

Seleniumで画面キャプチャ（Colaboratory）

ColabでのSeleniumは、google-colab-seleniumにお任せしている。多分今だとSeleniumだけでいける気がするんだけど、まあ楽なので。

!pip install google-colab-selenium

キャプチャ時に日本語フォントがないと化けるのでインストールしておく。

!apt-get install fonts-ipaexfont-gothic fonts-ipaexfont-mincho

Zennの自分の記事をキャプチャしてみる。

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time


options = [
    "--headless",
    "--disable-dev-shm-usage",
    "--no-sandbox",
    "--incognito",
    "--lang=ja",
    "--window-size=1200x675"
]


def initialize_driver(options):
    driver_options = Options()
    for op in options:
        driver_options.add_argument(op)

    driver = webdriver.Chrome(options=driver_options)
    return driver


def capture_page(driver, url, filename):
    places = {}
    try:
        driver.get(url)
        time.sleep(3)

        last_height = driver.execute_script("return document.body.scrollHeight")
        width = driver.execute_script("return document.body.scrollHeight;")

        while True:
            driver.set_window_size(width, last_height)
            time.sleep(3)

            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3)

            new_height = driver.execute_script("return document.body.offsetHeight")
    
            if new_height == last_height:
                break

            last_height = new_height
    
        driver.save_screenshot(filename)
        print(f"Captured as {filename}")
    except Exception as e:
        print(f"Error: {e}")
        raise

def main():
    driver = initialize_driver(options)
    try:
        url = "https://zenn.dev/kun432/articles/20230921-vector-databases-jp-part-1"
        filename = "capture.png"
        capture_page(driver, url, filename)
    except Exception as e:
        print(f"Error: {e}")
    finally:
        driver.quit()


if __name__ == "__main__":
    main()

こんな感じで取れる。

サイズの調整は難しいね・・・

kun432

参考

このスクラップは2024/05/11にクローズされました