👋

データサイエンス100本ノック(構造化データ加工編)をRで解く 21 - 30

2021/11/21に公開

Top
前の問題:11-20

R-021

df_receipt %>% dim()

R-022

pull(df_receipt) %>% unique() %>% length()

R-023

df_receipt %>%
    group_by(store_cd) %>%
    summarise(total_amount = sum(amount), total_quantity = sum(quantity))

R-024

df_receipt %>%
    group_by(customer_id) %>%
    summarise(latest_buying = max(sales_ymd)) %>%
    head(10)

R-025

df_receipt %>%
    group_by(customer_id) %>%
    summarise(oldest_buying = min(sales_ymd)) %>%
    head(10)

R-026

df_receipt %>%
    group_by(customer_id) %>%
    summarise(latest_buying = max(sales_ymd), oldest_buying = min(sales_ymd)) %>%
    ungroup() %>%
    dplyr::filter(
        latest_buying != oldest_buying
    ) %>%
    head(10)

R-027

df_receipt %>%
    group_by(store_cd) %>%
    summarise(
        avg_amount = mean(amount)
    ) %>%
    arrange(desc(avg_amount)) %>%
    head(5)

R-028

df_receipt %>%
    group_by(store_cd) %>%
    summarise(
        avg_amount = mean(amount),
        med_amount = median(amount)
    ) %>%
    arrange(desc(med_amount)) %>%
    head(5)

R-029

mode <- function(x) {
    res <- names(which.max(table(x)))
    return(res)
}

df_receipt %>%
    group_by(store_cd) %>%
    summarise(
        avg_amount = mean(amount),
        med_amount = median(amount),
        mod_amount = mode(product_cd)
    ) %>%
    head(10)

R-030

df_receipt %>%
    group_by(store_cd) %>%
    summarise(var_amount = var(amount)) %>%
    arrange(desc(var_amount)) %>%
    head(10)

次の問題:31-40

Discussion