Open7

ネストされたJSONをduckdbで解析する

とりとり

読み込むJSONファイルの形式

{
  "orders": [
    {
      "orderId": 80,
      "createdAt": "2024-11-03T03:07:32.271Z", // ISO 8601
      "readyAt": "2024-11-03T03:16:38.556Z",
      "servedAt": "2024-11-03T03:16:38.556Z",
      "items": [
        {
          "id": "02_cafeore_brend",
          "name": "珈琲・俺ブレンド",
          "price": 300,
          "type": "hot", // ENUM hot, ice, hotOre, iceOre, milk, others
          "assignee": "5th" // nullable
        },
        {
          "id": "02_cafeore_brend",
          "name": "珈琲・俺ブレンド",
          "price": 300,
          "type": "hot",
          "assignee": "5th"
        }
      ],
      ...,
    },
    {
      "id": "0HUULSYJdO8y3QpxqF7c",
      "createdAt": "2024-11-03T03:07:32.271Z",
      "readyAt": "2024-11-03T03:16:38.556Z",
      ...,
    },
    ...,
  ]
}
とりとり

ISO 8601 形式の文字列をタイムスタンプ型として読み込む

-- タイムスタンプを扱うextensionをロードする
load icu;

-- JSONがUTCなので、UTCにセットしておく
set TimeZone = 'UTC';

create table
    cafeore2024
as select
    -- JSONのプロパティを全て読み込む
    j.*,
    -- `D`付きは timestampz 型のデータを格納
    -- UTCとして読込
    strptime(j.createdAt, '%xT%X.%gZ')::TIMESTAMPTZ as createdAtD,
    strptime(j.servedAt, '%xT%X.%gZ')::TIMESTAMPTZ as servedAtD,
    strptime(j.readyAt, '%xT%X.%gZ')::TIMESTAMPTZ as readyAtD
from (
    select
        -- "orders"プロパティの中にリストが入っているのでこうなる
        unnest(orders) as j
    from
        -- JSONファイル名
        'orders_day12.json'
);
とりとり

提供時間の短い順にソート

select
    orderId,
    -- timestampz 型を使う
    age(servedAtD, createdAtD) as duration
from
    cafeore2024
order by
    duration
limit
    5
;
とりとり

アイテムを3つ以上含む注文のみ抽出

select
    orderId,
    age(servedAtD, createdAtD) as duration,
    count(cups.unnest) as number_of_cups
from
    cafeore2024,
    unnest(items) as cups
group by
    orderId,
    servedAtD,
    createdAtD
having
    number_of_cups > 2
order by
    duration asc
limit
    3
;
とりとり

特定の種類のアイテムが含まれる注文のみを抽出

select
	orderId
from (
    select
    	orderId,
    	cups.unnest as cup,
    from
    	cafeore2024,
    	unnest(items) as cups
)
where
    -- struct_extract で特定のプロパティにアクセス
    -- hot, ice, hotOre, iceOre のみを絞り込み
    struct_extract(cup, 'type') != 'milk'
    and
    struct_extract(cup, 'type') != 'others'
;
とりとり

UTC の timestampz を Asia/Tokyo で表示する

特定の種類のアイテムが含まれる注文を提供時間短い順で並べる

select
    orderId,
    -- UTC の timestampz を日本時間で表示する
    createdAtD at time zone 'Asia/Tokyo' as '注文時刻',
    age(servedAtD, createdAtD) as duration,
    cupCount,
    -- 短い順に並べると cupCount は 1 になるのでアイテム名を表示
    struct_extract(cup, 'name') as itemName
from (
    select
    	orderId,
    	createdAtD,
    	servedAtD,
    	cups.unnest as cup,
    	count(cups) as cupCount
    from
    	cafeore2024,
    	unnest(items) as cups
    group by
    	orderId,
    	createdAtD,
    	servedAtD,
    	cup
)
where
    struct_extract(cup, 'type') != 'milk'
    and
    struct_extract(cup, 'type') != 'others'
group by
    orderId,
    createdAtD,
    duration,
    cupCount,
    cup
order by
    duration asc
limit
    5
;
とりとり

ドリップが1杯しか含まれていない注文で、提供時間が長い順5つ

select
    orderId,
    createdAtD at time zone 'Asia/Tokyo' as '注文時刻',
    age(servedAtD, createdAtD) as duration,
    cupCount,
    struct_extract(cup, 'name') as itemName
from (
    select
        orderId,
        createdAtD,
        servedAtD,
        any_value(cups.unnest) as cup,
        count(cups) as cupCount
    from
        cafeore2024,
        unnest(items) as cups
    group by
        orderId,
        createdAtD,
        servedAtD,
)
where
    struct_extract(cup, 'type') != 'milk'
    and
    struct_extract(cup, 'type') != 'others'
    and
    cupCount = 1
group by
    orderId,
    createdAtD,
    duration,
    cupCount,
    cup
order by
    duration desc
limit
    5
;