👋
データサイエンス100本ノック(構造化データ加工編)をRで解く 21 - 30
R-021
df_receipt %>% dim()
R-022
pull(df_receipt) %>% unique() %>% length()
R-023
df_receipt %>%
group_by(store_cd) %>%
summarise(total_amount = sum(amount), total_quantity = sum(quantity))
R-024
df_receipt %>%
group_by(customer_id) %>%
summarise(latest_buying = max(sales_ymd)) %>%
head(10)
R-025
df_receipt %>%
group_by(customer_id) %>%
summarise(oldest_buying = min(sales_ymd)) %>%
head(10)
R-026
df_receipt %>%
group_by(customer_id) %>%
summarise(latest_buying = max(sales_ymd), oldest_buying = min(sales_ymd)) %>%
ungroup() %>%
dplyr::filter(
latest_buying != oldest_buying
) %>%
head(10)
R-027
df_receipt %>%
group_by(store_cd) %>%
summarise(
avg_amount = mean(amount)
) %>%
arrange(desc(avg_amount)) %>%
head(5)
R-028
df_receipt %>%
group_by(store_cd) %>%
summarise(
avg_amount = mean(amount),
med_amount = median(amount)
) %>%
arrange(desc(med_amount)) %>%
head(5)
R-029
mode <- function(x) {
res <- names(which.max(table(x)))
return(res)
}
df_receipt %>%
group_by(store_cd) %>%
summarise(
avg_amount = mean(amount),
med_amount = median(amount),
mod_amount = mode(product_cd)
) %>%
head(10)
R-030
df_receipt %>%
group_by(store_cd) %>%
summarise(var_amount = var(amount)) %>%
arrange(desc(var_amount)) %>%
head(10)
Discussion