🚿
BigQueryでGAのネストされたデータを整形
あくまで検証用なので参考までに。
-- ◯日前の正規化データ作成
CREATE TEMPORARY FUNCTION fromDate() AS (DATE_SUB(CURRENT_DATE('Asia/Tokyo'), INTERVAL ◯ DAY));
CREATE TEMPORARY FUNCTION toDate() AS (DATE_SUB(CURRENT_DATE('Asia/Tokyo'), INTERVAL ◯ DAY));
with unnest_from_ga_raw as (
SELECT
event_date
, user_id
, user_pseudo_id
, event_timestamp
, event_name
, user_first_touch_timestamp
, device.category as device_category
, device.operating_system as device_operating_system
, device.operating_system_version as device_operating_system_version
, device.language as device_language
, device.web_info.browser as device_web_info_browser
, device.web_info.browser_version as device_web_info_browser_version
, device.web_info.hostname as device_web_info_hostname
, geo.country as geo_country
, geo.region as geo_region
, geo.city as geo_city
, traffic_source.name as traffic_source_name
, traffic_source.medium as traffic_source_medium
, traffic_source.source as traffic_source_source
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'page_location'
) as page_location
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'page_referrer'
) as page_referrer
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'page_title'
) as page_title
, (
select
params.value.int_value
from
unnest(event_params) as params
where
params.key = 'ga_session_id'
) as ga_session_id
, (
select
params.value.int_value
from
unnest(event_params) as params
where
params.key = 'ga_session_number'
) as ga_session_number
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'source'
) as source
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'term'
) as term
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'medium'
) as medium
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'campaign'
) as campaign
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'content'
) as content
, (
select
params.value.int_value
from
unnest(event_params) as params
where
params.key = 'percent_scrolled'
) as percent_scrolled
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'outbound'
) as outbound
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'session_engaged'
) as session_engaged
, (
select
params.value.int_value
from
unnest(event_params) as params
where
params.key = 'engagement_time_msec'
) as engagement_time_msec
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'search_term'
) as search_term
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'link_url'
) as link_url
, (
select
params.value.string_value
from
unnest(event_params) as params
where
params.key = 'ignore_referrer'
) as ignore_referrer
, (
select
params.value.int_value
from
unnest(event_params) as params
where
params.key = 'entrances'
) as entrances
FROM
`analytics_.events_*`
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', fromDate()) AND FORMAT_DATE('%Y%m%d', toDate())
),
target_ga as (
SELECT
PARSE_DATE("%Y%m%d", event_date) AS event_date
, user_id
, user_pseudo_id
, DATETIME(timestamp_micros(event_timestamp), 'Asia/Tokyo') as event_timestamp
, event_name
, DATETIME(timestamp_micros(user_first_touch_timestamp), 'Asia/Tokyo') as user_first_touch_timestamp
, device_category
, device_operating_system
, device_operating_system_version
, device_language
, device_web_info_browser
, device_web_info_browser_version
, device_web_info_hostname
, geo_country
, geo_region
, geo_city
, REGEXP_REPLACE(traffic_source_name, '[()]', '') as traffic_source_name
, REGEXP_REPLACE(traffic_source_medium, '[()]', '') as traffic_source_medium
, REGEXP_REPLACE(traffic_source_source, '[()]', '') as traffic_source_source
, page_location
, page_referrer
, page_title
, ga_session_id
, ga_session_number
, source
, REGEXP_REPLACE(term, '[()]', '') as term
, REGEXP_REPLACE(medium, '[()]', '') as medium
, REGEXP_REPLACE(campaign, '[()]', '') as campaign
, REGEXP_REPLACE(content, '[()]', '') as content
, percent_scrolled
, outbound
, session_engaged
, engagement_time_msec
, search_term
, link_url
, ignore_referrer
, entrances
FROM unnest_from_ga_raw
)
select *
from target_ga;
Discussion