🦑

(BigQuery)配列を使った適当な文字列データ生成

2023/09/30に公開

背景

  • テスト用に大量の偽の商品名データを用意することになった。
  • "T-Shirt1","T-Shirt2"などの単純な商品名ではなく、リアルな商品名にしてほしいと要望があった

やりたいこと

  • [SALE]NIKE:メンズ用パーカー 黒[50%OFF] PUMA:靴 白のように本物っぽい商品名データを作る

手段

  • 配列を用いて、ランダムに値を選択する

クエリ(1)

-- 配列の中からランダムに一つ選択する
WITH
  items AS (
  SELECT
    ARRAY<STRING>[
    "【SALE】NIKE:ジャージ 黒",
    "【BAZAAR】addidas:靴 ピンク",
    "【FAMILY SALE】new blance:995 グレイ",
    " "] AS items_array),
  random_items AS (
  SELECT
    items_array[ CAST( ROUND( RAND()*(ARRAY_LENGTH(items_array)-1),0) AS int64) ]
  FROM
    items )
SELECT
  (
  SELECT
    *
  FROM
    random_items) AS name
FROM
  UNNEST(GENERATE_ARRAY(1, 10)) AS id
ORDER BY
  name

クエリ(1)の結果

クエリ(2)

-- (応用)配列の中からランダムに一つ選択する+文字列結合する

WITH
  events AS (
  SELECT
    ARRAY<STRING>[
    "【SALE】",
    "【BAZAAR】",
    "【FAMILY SALE】",
    "【10%OFF】",
    "【50%OFF】",
    "【FINAL】", 
    "【MEGA】",
    ""] AS events_array),
  random_events AS (
  SELECT
    events_array[ CAST( ROUND( RAND()*(ARRAY_LENGTH(events_array)-1),0) AS int64) ]
  FROM
    events 
  ),
  brands AS (
  SELECT
    ARRAY<STRING>[
    "NIKE",
    "addidas",
    "puma",
    "reebok",
    "New Balance",
    "Hoka",
    "FILA",
    "converse",
    "VANS",
    "asics",
    "kappa"
  ] AS brands_array),
  random_brands AS (
  SELECT
    brands_array[ CAST( ROUND( RAND()*(ARRAY_LENGTH(brands_array)-1),0) AS int64) ]
  FROM
    brands
  ),
  items AS (
  SELECT
    ARRAY<STRING>[
    "belt",
    "shirts",
    "shoes",
    "pants",
    "glasses",
    "hat",
    "dress",
    "jacket",
    "scarf",
    "socks"
  ] AS items_array),
  random_items AS (
  SELECT
    items_array[ CAST( ROUND( RAND()*(ARRAY_LENGTH(items_array)-1),0) AS int64) ]
  FROM
    items
  ),
  whom AS (
  SELECT
    ARRAY<STRING>[
      "for men",
      "for woman",
      "for girl",
      "for boy",
      "for free",
      "for dog",
      "for male",
      "for femal",
      "for pet",
      "for cat"
  ] AS whom_array),
  random_whom AS (
  SELECT
    whom_array[ CAST( ROUND( RAND()*(ARRAY_LENGTH(whom_array)-1),0) AS int64) ]
  FROM
    whom
  ),
  color AS (
  SELECT
    ARRAY<STRING>[
      "yellow",
      "black",
      "white",
      "red",
      "blue",
      "green",
      "purple",
      "orange",
      "pink", 
      "brown"
  ] AS color_array),
  random_color AS (
  SELECT
    color_array[ CAST( ROUND( RAND()*(ARRAY_LENGTH(color_array)-1),0) AS int64) ]
  FROM
    color
  )

SELECT
  CONCAT(
  (SELECT * FROM random_events),"",
  (SELECT * FROM random_brands),":",
  (SELECT * FROM random_items)," ",
  (SELECT * FROM random_whom),"-",
  (SELECT * FROM random_color)
  ) as name
FROM
  UNNEST(GENERATE_ARRAY(1, 10)) AS id
ORDER BY
  name

クエリ(2)の結果

Discussion