🚿

BigQueryでGAのネストされたデータを整形

2024/07/17に公開

あくまで検証用なので参考までに。

-- ◯日前の正規化データ作成
CREATE TEMPORARY FUNCTION fromDate() AS (DATE_SUB(CURRENT_DATE('Asia/Tokyo'), INTERVALDAY));
CREATE TEMPORARY FUNCTION toDate()  AS (DATE_SUB(CURRENT_DATE('Asia/Tokyo'), INTERVALDAY));

with unnest_from_ga_raw as (
    SELECT
         event_date
         , user_id
         , user_pseudo_id
         , event_timestamp
         , event_name
         , user_first_touch_timestamp
         , device.category as device_category
         , device.operating_system as device_operating_system
         , device.operating_system_version as device_operating_system_version
         , device.language as device_language
         , device.web_info.browser as device_web_info_browser
         , device.web_info.browser_version as device_web_info_browser_version
         , device.web_info.hostname as device_web_info_hostname
         , geo.country as geo_country
         , geo.region as geo_region
         , geo.city as geo_city
         , traffic_source.name as traffic_source_name
         , traffic_source.medium as traffic_source_medium
         , traffic_source.source as traffic_source_source
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'page_location'
    ) as page_location
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'page_referrer'
    ) as page_referrer
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'page_title'
    ) as page_title
         , (
        select
            params.value.int_value
        from
            unnest(event_params) as params
        where
                params.key = 'ga_session_id'
    ) as ga_session_id
         , (
        select
            params.value.int_value
        from
            unnest(event_params) as params
        where
                params.key = 'ga_session_number'
    ) as ga_session_number
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'source'
    ) as source
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'term'
    ) as term
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'medium'
    ) as medium
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'campaign'
    ) as campaign
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'content'
    ) as content
         , (
        select
            params.value.int_value
        from
            unnest(event_params) as params
        where
                params.key = 'percent_scrolled'
    ) as percent_scrolled
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'outbound'
    ) as outbound
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'session_engaged'
    ) as session_engaged

         , (
        select
            params.value.int_value
        from
            unnest(event_params) as params
        where
                params.key = 'engagement_time_msec'
    ) as engagement_time_msec
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'search_term'
    ) as search_term
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'link_url'
    ) as link_url
         , (
        select
            params.value.string_value
        from
            unnest(event_params) as params
        where
                params.key = 'ignore_referrer'
    ) as ignore_referrer
        , (
        select
            params.value.int_value
        from
            unnest(event_params) as params
        where
                params.key = 'entrances'
    ) as entrances

    FROM
        `analytics_.events_*`
    WHERE 
      _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', fromDate()) AND FORMAT_DATE('%Y%m%d', toDate())
),
target_ga as (
  SELECT
         PARSE_DATE("%Y%m%d", event_date) AS event_date
         , user_id
         , user_pseudo_id
         , DATETIME(timestamp_micros(event_timestamp), 'Asia/Tokyo') as event_timestamp
         , event_name
         , DATETIME(timestamp_micros(user_first_touch_timestamp), 'Asia/Tokyo') as user_first_touch_timestamp
         , device_category
         , device_operating_system
         , device_operating_system_version
         , device_language
         , device_web_info_browser
         , device_web_info_browser_version
         , device_web_info_hostname
         , geo_country
         , geo_region
         , geo_city
         , REGEXP_REPLACE(traffic_source_name, '[()]', '') as traffic_source_name
         , REGEXP_REPLACE(traffic_source_medium, '[()]', '') as traffic_source_medium
         , REGEXP_REPLACE(traffic_source_source, '[()]', '') as traffic_source_source
         , page_location
         , page_referrer
         , page_title
         , ga_session_id
         , ga_session_number
         , source
         , REGEXP_REPLACE(term, '[()]', '') as term
         , REGEXP_REPLACE(medium, '[()]', '') as medium
         , REGEXP_REPLACE(campaign, '[()]', '') as campaign
         , REGEXP_REPLACE(content, '[()]', '') as content
         , percent_scrolled
         , outbound
         , session_engaged
         , engagement_time_msec
         , search_term
         , link_url
         , ignore_referrer
         , entrances
  FROM unnest_from_ga_raw
)

select *
from target_ga;



Discussion