Open7
ネストされたJSONをduckdbで解析する
読み込むJSONファイルの形式
{
"orders": [
{
"orderId": 80,
"createdAt": "2024-11-03T03:07:32.271Z", // ISO 8601
"readyAt": "2024-11-03T03:16:38.556Z",
"servedAt": "2024-11-03T03:16:38.556Z",
"items": [
{
"id": "02_cafeore_brend",
"name": "珈琲・俺ブレンド",
"price": 300,
"type": "hot", // ENUM hot, ice, hotOre, iceOre, milk, others
"assignee": "5th" // nullable
},
{
"id": "02_cafeore_brend",
"name": "珈琲・俺ブレンド",
"price": 300,
"type": "hot",
"assignee": "5th"
}
],
...,
},
{
"id": "0HUULSYJdO8y3QpxqF7c",
"createdAt": "2024-11-03T03:07:32.271Z",
"readyAt": "2024-11-03T03:16:38.556Z",
...,
},
...,
]
}
ISO 8601 形式の文字列をタイムスタンプ型として読み込む
-- タイムスタンプを扱うextensionをロードする
load icu;
-- JSONがUTCなので、UTCにセットしておく
set TimeZone = 'UTC';
create table
cafeore2024
as select
-- JSONのプロパティを全て読み込む
j.*,
-- `D`付きは timestampz 型のデータを格納
-- UTCとして読込
strptime(j.createdAt, '%xT%X.%gZ')::TIMESTAMPTZ as createdAtD,
strptime(j.servedAt, '%xT%X.%gZ')::TIMESTAMPTZ as servedAtD,
strptime(j.readyAt, '%xT%X.%gZ')::TIMESTAMPTZ as readyAtD
from (
select
-- "orders"プロパティの中にリストが入っているのでこうなる
unnest(orders) as j
from
-- JSONファイル名
'orders_day12.json'
);
提供時間の短い順にソート
select
orderId,
-- timestampz 型を使う
age(servedAtD, createdAtD) as duration
from
cafeore2024
order by
duration
limit
5
;
アイテムを3つ以上含む注文のみ抽出
select
orderId,
age(servedAtD, createdAtD) as duration,
count(cups.unnest) as number_of_cups
from
cafeore2024,
unnest(items) as cups
group by
orderId,
servedAtD,
createdAtD
having
number_of_cups > 2
order by
duration asc
limit
3
;
特定の種類のアイテムが含まれる注文のみを抽出
select
orderId
from (
select
orderId,
cups.unnest as cup,
from
cafeore2024,
unnest(items) as cups
)
where
-- struct_extract で特定のプロパティにアクセス
-- hot, ice, hotOre, iceOre のみを絞り込み
struct_extract(cup, 'type') != 'milk'
and
struct_extract(cup, 'type') != 'others'
;
Asia/Tokyo
で表示する
UTC の timestampz を 特定の種類のアイテムが含まれる注文を提供時間短い順で並べる
select
orderId,
-- UTC の timestampz を日本時間で表示する
createdAtD at time zone 'Asia/Tokyo' as '注文時刻',
age(servedAtD, createdAtD) as duration,
cupCount,
-- 短い順に並べると cupCount は 1 になるのでアイテム名を表示
struct_extract(cup, 'name') as itemName
from (
select
orderId,
createdAtD,
servedAtD,
cups.unnest as cup,
count(cups) as cupCount
from
cafeore2024,
unnest(items) as cups
group by
orderId,
createdAtD,
servedAtD,
cup
)
where
struct_extract(cup, 'type') != 'milk'
and
struct_extract(cup, 'type') != 'others'
group by
orderId,
createdAtD,
duration,
cupCount,
cup
order by
duration asc
limit
5
;
ドリップが1杯しか含まれていない注文で、提供時間が長い順5つ
select
orderId,
createdAtD at time zone 'Asia/Tokyo' as '注文時刻',
age(servedAtD, createdAtD) as duration,
cupCount,
struct_extract(cup, 'name') as itemName
from (
select
orderId,
createdAtD,
servedAtD,
any_value(cups.unnest) as cup,
count(cups) as cupCount
from
cafeore2024,
unnest(items) as cups
group by
orderId,
createdAtD,
servedAtD,
)
where
struct_extract(cup, 'type') != 'milk'
and
struct_extract(cup, 'type') != 'others'
and
cupCount = 1
group by
orderId,
createdAtD,
duration,
cupCount,
cup
order by
duration desc
limit
5
;