💻
【BigQuery】SQLチートシート
内容
データ分析や定期的なデータ更新作業において、BigQueryでよく使うSQLをまとめました。
自分用の備忘録なので、見にくいと思われますが、ご容赦ください。
日付型の整理
4つの型
TIMESTAMP型(唯一Timezoneを持つ)
SELECT
TIMESTAMP('2024-01-01 12:00:00', 'Asia/Tokyo')
-- 結果
-- 2024-01-01 03:00:00 UTC
DATETIME型
SELECT
DATETIME('2024-01-01 12:00:00');
-- 結果
-- 2024-01-01T12:00:00
DATE型
SELECT
DATE('2024-01-01');
-- 結果
-- 2024-01-01
TIME型
SELECT
TIME("2022-12-01 09:00:00");
-- 結果
-- 09:00:00
対 TIMESTAMP型
-- UTCでは23:45、これをJSTの翌日8:45に変換する
WITH table AS (
SELECT TIMESTAMP('2024-07-31 23:45:00') as timestamp_
)
SELECT
timestamp_
-- JSTのDATETIME型へ変換
, DATETIME(timestamp_, "Asia/Tokyo") as jst_datetime
-- JSTのDATE型へ変換
, DATE(timestamp_, 'Asia/Tokyo') as jst_date
-- 時刻を抽出
, EXTRACT(HOUR FROM DATETIME(timestamp_, "Asia/Tokyo")) as jst_hour
, EXTRACT(MINUTE FROM DATETIME(timestamp_, "Asia/Tokyo")) as jst_minute
FROM
table
WHERE
DATE(timestamp_, 'Asia/Tokyo') BETWEEN '2024-08-01' AND '2024-08-02'
;
対 DATE型
WITH table AS (
SELECT DATE(2024,8,31) as date_
)
SELECT
date_
# 各期間へ丸める
, DATE_TRUNC(date_, DAY) as date_trunc_
, DATE_TRUNC(date_, WEEK(MONDAY)) as week_trunc
, DATE_TRUNC(date_, MONTH) as month_trunc
, DATE_TRUNC(date_, QUARTER) as quarter_trunc
, DATE_TRUNC(date_, YEAR) as year_trunc
# 抽出
, EXTRACT(DAYOFWEEK FROM date_) as day_of_week
, EXTRACT(DAY FROM date_) as day_
, EXTRACT(MONTH FROM date_) as month
, EXTRACT(QUARTER FROM date_) as quarter
, EXTRACT(YEAR FROM date_) as year
FROM
table
WHERE
date_ BETWEEN DATE(2024,8,30) AND DATE(2024,8,31)
;
対 DATETIME型
WITH table AS (
SELECT DATETIME('2024-08-31 12:45:00') as datetime_
)
SELECT
datetime_
# 各期間へ丸める
, datetime_TRUNC(datetime_, DAY) as datetime_trunc_
, datetime_TRUNC(datetime_, WEEK(MONDAY)) as week_trunc
, datetime_TRUNC(datetime_, MONTH) as month_trunc
, datetime_TRUNC(datetime_, QUARTER) as quarter_trunc
, datetime_TRUNC(datetime_, YEAR) as year_trunc
# 抽出
, EXTRACT(DAYOFWEEK FROM datetime_) as day_of_week
, EXTRACT(DAY FROM datetime_) as day_
, EXTRACT(MONTH FROM datetime_) as month
, EXTRACT(QUARTER FROM datetime_) as quarter
, EXTRACT(YEAR FROM datetime_) as year
FROM
table
WHERE
datetime_ BETWEEN DATETIME('2024-08-31 12:00:00') AND DATETIME('2024-08-31 18:00:00')
# DATE(datetime_) BETWEEN DATE(2024,8,30) AND DATE(2024,8,31)
;
INTからの日付への変換
WITH table AS (
SELECT
2024 as year_
, 8 as month_
, 31 as day_
, 18 as hour_
, 45 as minute_
, 20240831 as yyyymmdd
)
SELECT
# 個々の数値をDATE型へ
DATE(year_, month_, day_)
# yyyymmddをDATE型へ
, PARSE_DATE('%Y%m%d', CAST(yyyymmdd AS STRING))
# 個々の数値をDATETIME型へ
, DATETIME(year_, month_, day_, hour_, minute_, 00)
FROM
table
;
CREATE TABLE
データセットはコンソール上で作成可能(こちら を参照)
通常のテーブル作成
SET @@dataset_project_id = 'test_project';
CREATE TABLE `test_dataset.customer`(
customer_id INT64 NOT NULL
, location STRING OPTIONS(description = "都道府県")
, name STRING
, weight FLOAT64
, log_date DATE
)
PARTITION BY log_date
OPTIONS(description = '顧客マスタ') -- テーブルの説明
;
外部ファイルからのテーブル作成
SET @@dataset_project_id = 'test_project';
CREATE OR REPLACE EXTERNAL TABLE `test_dataset.purchase` (
fruit STRING
, customer_id INT64
, log_date DATE
)
OPTIONS (
format = 'CSV'
, uris = ['gs://test_bucket_20241002/test_data.csv']
, skip_leading_rows = 1
);
既存テーブルのコピー
SET @@dataset_project_id = 'test_project';
CREATE TABLE `test_dataset.customer_copy` -- 新規作成テーブル
COPY `test_dataset.customer` -- コピー元
OPTIONS(description = '20241002時点でのバックアップ') -- テーブルの説明
;
INSERT
INSERT INTO
SET @@dataset_project_id = 'test_project';
INSERT INTO `test_dataset.customer`
WITH t1 AS(
SELECT
10000 as customer_id
, '神奈川県' as location
, '田中 太郎' as name
, 68.2 as weight
, DATE(2024,10,1) as log_date
UNION ALL
SELECT
555 as customer_id
, '長野県' as location
, '山本 二郎' as name
, 54.2 as weight
, DATE(2024,10,2) as log_date
)
SELECT *
FROM t1
;
定期実行をイメージした、PARTITION ごとの INSERT OVERWIRTE
SET @@dataset_project_id = 'test_project';
BEGIN
-- デフォルトでは直近日曜日
DECLARE target_date DATE
DEFAULT DATE_TRUNC(CURRENT_DATE(), WEEK(SUNDAY));
BEGIN TRANSACTION;
------------------------------------------------------------
------------------------------------------------------------
-- 過去分を再集計する場合は log_date を日曜に指定する
-- 例:2024-09-08(日) ~ 2024-09-14(月)を再集計するなら target_date = 2024-09-08
-- SET target_date = DATE('2024-08-11');
------------------------------------------------------------
------------------------------------------------------------
-- target_date が日曜以外だった場合はエラーを出して終了させる
IF EXTRACT(DAYOFWEEK FROM DATE(target_date)) != 1 THEN
SELECT ERROR('The target date is not a Sunday!');
END IF;
-- 集計週のデータがあれば削除
DELETE FROM test_dataset.customer
WHERE log_date = target_date;
-- 集計週のデータを挿入
INSERT INTO test_dataset.customer
WITH t1 AS(
SELECT
9999 as customer_id
, '大阪府' as location
, '若山 麟太郎' as name
, 70.3 as weight
, target_date as log_date
)
SELECT *
FROM t1
;
COMMIT TRANSACTION;
END;
For文
SET @@dataset_project_id = 'test_project';
BEGIN
DECLARE prefectures ARRAY<STRING>;
SET prefectures = ['北海道', '山梨県', '長崎県', '栃木県'];
FOR i IN (SELECT * FROM UNNEST(prefectures) AS prefecture)
DO
INSERT INTO `test_dataset.customer_copy`
SELECT *
FROM `test_dataset.customer`
WHERE location = i.prefecture;
END FOR;
END;
Discussion