🐥

データサイエンス100本ノック(構造化データ加工編)をRで解く 61 - 70

2021/11/21に公開

Top
前の問題:51-60

R-061

df_receipt %>%
    group_by(customer_id) %>%
    summarise(total_amount = sum(amount)) %>%
    inner_join(
        df_customer %>%
            dplyr::filter(!str_starts(customer_id, "Z")),
        by="customer_id"
    ) %>% 
    select(
        customer_id,
        total_amount
    ) %>% 
    mutate(
        std_amount = standardize(total_amount),
        rgl_amount = regularize(total_amount),
        log10_amount = log(total_amount, base = 10)
    ) -> res
res %>% head(10)

R-062

df_receipt %>%
    group_by(customer_id) %>%
    summarise(total_amount = sum(amount)) %>%
    inner_join(
        df_customer %>%
            dplyr::filter(!str_starts(customer_id, "Z")),
        by="customer_id"
    ) %>% 
    select(
        customer_id,
        total_amount
    ) %>% 
    mutate(
        std_amount = standardize(total_amount),
        rgl_amount = regularize(total_amount),
        log10_amount = log(total_amount, base = 10),
        loge_amount = log(total_amount)
    ) -> res
res %>% head(10)

R-063

df_product %>%
    mutate(unit_profit = unit_price - unit_cost) %>%
    select(
        product_cd,
        unit_price,
        unit_cost,
        unit_profit
    ) %>% head(10)

R-064

df_product %>%
    mutate(unit_profit = unit_price - unit_cost) %>%
    select(
        product_cd,
        unit_price,
        unit_cost,
        unit_profit
    ) %>% 
    drop_na() %>%
    summarise(avg_profit = mean(unit_profit))

R-065

df_product %>%
    drop_na() %>% 
    mutate(
        unit_cost_new = round(unit_price * 0.7),
        unit_profit = unit_price - unit_cost,
        unit_profit_new = unit_price - unit_cost_new,
        unit_profit_rate = round(unit_profit / unit_price, 3),
        unit_profit_rate_new = round(unit_profit_new / unit_price, 3)
    ) %>%
    select(
        starts_with("unit")
    ) %>% head(10)

R-066

df_product %>%
    drop_na() %>% 
    mutate(
        unit_price_new = round(unit_cost / 70 * 100),
        unit_profit = unit_price - unit_cost,
        unit_profit_new = unit_price_new - unit_cost,
        unit_profit_rate = round(unit_profit / unit_price, 3),
        unit_profit_rate_new = round(unit_profit_new / unit_price_new, 3)
    ) %>%
    select(
        starts_with("unit")
    ) %>% head(10)

R-067

df_product %>%
    drop_na() %>% 
    mutate(
        unit_price_new = ceiling(unit_cost / 70 * 100),
        unit_profit = unit_price - unit_cost,
        unit_profit_new = unit_price_new - unit_cost,
        unit_profit_rate = round(unit_profit / unit_price, 3),
        unit_profit_rate_new = round(unit_profit_new / unit_price_new, 3)
    ) %>%
    select(
        starts_with("unit")
    ) %>% head(10)

R-068

df_product %>%
    drop_na() %>%
    mutate(
        unit_price_taxed = floor(unit_price * 1.10)
    ) %>%
    select(starts_with("unit")) %>%
    head(10)

R-069

joined <- df_receipt %>%
    select(
        customer_id,
        product_cd,
        quantity,
        amount,
    ) %>%
    left_join(
        df_product %>%
            select(product_cd, category_major_cd),
        by="product_cd"
    )

left_join(
    joined %>%
        group_by(customer_id) %>%
        summarise(total_amount = sum(amount)),
    joined %>%
        dplyr::filter(category_major_cd == "07") %>%
        group_by(customer_id) %>%
        summarise(total_amount_07 = sum(amount))
    ,by="customer_id"
) %>%
    mutate(
        rate = total_amount_07 / total_amount
    ) %>%
    drop_na() %>%
    head(10)

R-070

df_receipt %>%
    left_join(
        df_customer %>% select(customer_id, application_date),
        by="customer_id",
    ) %>% 
    mutate(
        sales_ymd = lubridate::parse_date_time(sales_ymd, "%Y%m%d"),
        application_date = lubridate::parse_date_time(application_date, "%Y%m%d"),
        days_since_application = sales_ymd - application_date,
    ) %>%
    select(
        customer_id,
        sales_ymd,
        application_date,
        days_since_application
    ) %>%
    head(10)

次の問題:71-80

Discussion