Closed2
Seleniumで画面キャプチャ(Colaboratory)
ColabでのSeleniumは、google-colab-seleniumにお任せしている。多分今だとSeleniumだけでいける気がするんだけど、まあ楽なので。
!pip install google-colab-selenium
キャプチャ時に日本語フォントがないと化けるのでインストールしておく。
!apt-get install fonts-ipaexfont-gothic fonts-ipaexfont-mincho
Zennの自分の記事をキャプチャしてみる。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
options = [
"--headless",
"--disable-dev-shm-usage",
"--no-sandbox",
"--incognito",
"--lang=ja",
"--window-size=1200x675"
]
def initialize_driver(options):
driver_options = Options()
for op in options:
driver_options.add_argument(op)
driver = webdriver.Chrome(options=driver_options)
return driver
def capture_page(driver, url, filename):
places = {}
try:
driver.get(url)
time.sleep(3)
last_height = driver.execute_script("return document.body.scrollHeight")
width = driver.execute_script("return document.body.scrollHeight;")
while True:
driver.set_window_size(width, last_height)
time.sleep(3)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
new_height = driver.execute_script("return document.body.offsetHeight")
if new_height == last_height:
break
last_height = new_height
driver.save_screenshot(filename)
print(f"Captured as {filename}")
except Exception as e:
print(f"Error: {e}")
raise
def main():
driver = initialize_driver(options)
try:
url = "https://zenn.dev/kun432/articles/20230921-vector-databases-jp-part-1"
filename = "capture.png"
capture_page(driver, url, filename)
except Exception as e:
print(f"Error: {e}")
finally:
driver.quit()
if __name__ == "__main__":
main()
こんな感じで取れる。
サイズの調整は難しいね・・・
参考
このスクラップは2024/05/11にクローズされました