🍣
サクッと試すときにおすすめ・BeautifulSoupでスクレイピングのテンプレート
from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
# URLの指定
html = urlopen("https://xxxxxxxxxxxxxxx")
soup = BeautifulSoup(html, "html.parser")
# titleを取得
title = soup.find('title').get_text(strip=True)
print(title)
# urlを取得
url = soup.find('a')['href']
print(url)
# class が demo の div 要素を全て取得する
demo_elms = soup.find_all('div', {'class': 'demo'})
print(len(demo_elms))
# テーブルを指定
table = soup.findAll("table", {"class":"tablesorter"})[0]
rows = table.findAll("tr")
for row in rows:
for cell in row.findAll(['td', 'th']):
print(cell.get_text())
Discussion