💻

【BigQuery】SQLチートシート

2024/10/11に公開

内容

データ分析や定期的なデータ更新作業において、BigQueryでよく使うSQLをまとめました。
自分用の備忘録なので、見にくいと思われますが、ご容赦ください。

日付型の整理

4つの型

TIMESTAMP型(唯一Timezoneを持つ)

SELECT
    TIMESTAMP('2024-01-01 12:00:00', 'Asia/Tokyo')

-- 結果
-- 2024-01-01 03:00:00 UTC

DATETIME型

SELECT
    DATETIME('2024-01-01 12:00:00');

-- 結果
-- 2024-01-01T12:00:00

DATE型

SELECT
    DATE('2024-01-01');

-- 結果
-- 2024-01-01

TIME型

SELECT
    TIME("2022-12-01 09:00:00");

-- 結果
-- 09:00:00

対 TIMESTAMP型

-- UTCでは23:45、これをJSTの翌日8:45に変換する

WITH table AS (
  SELECT TIMESTAMP('2024-07-31 23:45:00') as timestamp_
)

SELECT
  timestamp_

  -- JSTのDATETIME型へ変換
  , DATETIME(timestamp_, "Asia/Tokyo") as jst_datetime
  -- JSTのDATE型へ変換
  , DATE(timestamp_, 'Asia/Tokyo') as jst_date

  -- 時刻を抽出
  , EXTRACT(HOUR FROM DATETIME(timestamp_, "Asia/Tokyo")) as jst_hour
  , EXTRACT(MINUTE FROM DATETIME(timestamp_, "Asia/Tokyo")) as jst_minute

FROM
  table
WHERE
  DATE(timestamp_, 'Asia/Tokyo') BETWEEN '2024-08-01' AND '2024-08-02'
;

対 DATE型

WITH table AS (
  SELECT DATE(2024,8,31) as date_
)

SELECT
    date_

    # 各期間へ丸める
    , DATE_TRUNC(date_, DAY) as date_trunc_
    , DATE_TRUNC(date_, WEEK(MONDAY)) as week_trunc
    , DATE_TRUNC(date_, MONTH) as month_trunc
    , DATE_TRUNC(date_, QUARTER) as quarter_trunc
    , DATE_TRUNC(date_, YEAR) as year_trunc

    # 抽出 
    , EXTRACT(DAYOFWEEK FROM date_) as day_of_week
    , EXTRACT(DAY FROM date_) as day_
    , EXTRACT(MONTH FROM date_) as month
    , EXTRACT(QUARTER FROM date_) as quarter
    , EXTRACT(YEAR FROM date_) as year

FROM
    table
WHERE
    date_ BETWEEN DATE(2024,8,30) AND DATE(2024,8,31)
;

対 DATETIME型

WITH table AS (
  SELECT DATETIME('2024-08-31 12:45:00') as datetime_
)

SELECT
    datetime_

    # 各期間へ丸める
    , datetime_TRUNC(datetime_, DAY) as datetime_trunc_
    , datetime_TRUNC(datetime_, WEEK(MONDAY)) as week_trunc
    , datetime_TRUNC(datetime_, MONTH) as month_trunc
    , datetime_TRUNC(datetime_, QUARTER) as quarter_trunc
    , datetime_TRUNC(datetime_, YEAR) as year_trunc

    # 抽出 
    , EXTRACT(DAYOFWEEK FROM datetime_) as day_of_week
    , EXTRACT(DAY FROM datetime_) as day_
    , EXTRACT(MONTH FROM datetime_) as month
    , EXTRACT(QUARTER FROM datetime_) as quarter
    , EXTRACT(YEAR FROM datetime_) as year

FROM
    table
WHERE
    datetime_ BETWEEN DATETIME('2024-08-31 12:00:00') AND DATETIME('2024-08-31 18:00:00')
    # DATE(datetime_) BETWEEN DATE(2024,8,30) AND DATE(2024,8,31)
;

INTからの日付への変換

WITH table AS (
    SELECT
        2024 as year_
        , 8 as month_
        , 31 as day_
        , 18 as hour_
        , 45 as minute_
        , 20240831 as yyyymmdd
)

SELECT
    # 個々の数値をDATE型へ
    DATE(year_, month_, day_)

    # yyyymmddをDATE型へ
    , PARSE_DATE('%Y%m%d', CAST(yyyymmdd AS STRING))
    
    # 個々の数値をDATETIME型へ
    , DATETIME(year_, month_, day_, hour_, minute_, 00) 
FROM
    table
;

CREATE TABLE

データセットはコンソール上で作成可能(こちら を参照)

通常のテーブル作成

SET @@dataset_project_id = 'test_project';

CREATE TABLE `test_dataset.customer`( 
    customer_id INT64 NOT NULL
    , location STRING OPTIONS(description = "都道府県")
    , name STRING
    , weight FLOAT64
    , log_date DATE
)
PARTITION BY log_date
OPTIONS(description = '顧客マスタ') -- テーブルの説明
;

外部ファイルからのテーブル作成

SET @@dataset_project_id = 'test_project';

CREATE OR REPLACE EXTERNAL TABLE `test_dataset.purchase` (
  fruit STRING
  , customer_id INT64
  , log_date DATE
)
OPTIONS (
    format = 'CSV'
    , uris = ['gs://test_bucket_20241002/test_data.csv']
    , skip_leading_rows = 1
);

既存テーブルのコピー

SET @@dataset_project_id = 'test_project';

CREATE TABLE `test_dataset.customer_copy` -- 新規作成テーブル
COPY `test_dataset.customer` -- コピー元
OPTIONS(description = '20241002時点でのバックアップ') -- テーブルの説明
; 

INSERT

INSERT INTO

SET @@dataset_project_id = 'test_project';

INSERT INTO `test_dataset.customer`

WITH t1 AS(
    SELECT
        10000 as customer_id
        ,  '神奈川県' as location
        , '田中 太郎' as name
        , 68.2 as weight
        , DATE(2024,10,1) as log_date
    UNION ALL
    SELECT
        555 as customer_id
        ,  '長野県' as location
        , '山本 二郎' as name
        , 54.2 as weight
        , DATE(2024,10,2) as log_date
)
SELECT *
FROM t1
;

定期実行をイメージした、PARTITION ごとの INSERT OVERWIRTE

SET @@dataset_project_id = 'test_project';

BEGIN

    -- デフォルトでは直近日曜日
    DECLARE target_date DATE 
        DEFAULT DATE_TRUNC(CURRENT_DATE(), WEEK(SUNDAY));

    BEGIN TRANSACTION;

    ------------------------------------------------------------
    ------------------------------------------------------------
    -- 過去分を再集計する場合は log_date を日曜に指定する
    -- 例:2024-09-08(日) ~ 2024-09-14(月)を再集計するなら target_date = 2024-09-08

    -- SET target_date = DATE('2024-08-11');
    ------------------------------------------------------------
    ------------------------------------------------------------

    -- target_date が日曜以外だった場合はエラーを出して終了させる
    IF EXTRACT(DAYOFWEEK FROM DATE(target_date)) != 1 THEN
        SELECT ERROR('The target date is not a Sunday!');
    END IF;

    -- 集計週のデータがあれば削除
    DELETE FROM test_dataset.customer
    WHERE log_date = target_date; 

    -- 集計週のデータを挿入
    INSERT INTO test_dataset.customer
    WITH t1 AS(
        SELECT
            9999 as customer_id
            , '大阪府' as location
            , '若山 麟太郎' as name
            , 70.3 as weight
            , target_date as log_date
    )
    SELECT *
    FROM t1
    ;

    COMMIT TRANSACTION;

END;

For文

SET @@dataset_project_id = 'test_project';

BEGIN

    DECLARE prefectures ARRAY<STRING>;
    SET prefectures = ['北海道', '山梨県', '長崎県', '栃木県'];

    FOR i IN (SELECT * FROM UNNEST(prefectures) AS prefecture)
    DO
    INSERT INTO `test_dataset.customer_copy`
    SELECT *
    FROM `test_dataset.customer`
    WHERE location = i.prefecture;
    END FOR;

END;

Discussion