🐥
データサイエンス100本ノック(構造化データ加工編)をRで解く 61 - 70
R-061
df_receipt %>%
group_by(customer_id) %>%
summarise(total_amount = sum(amount)) %>%
inner_join(
df_customer %>%
dplyr::filter(!str_starts(customer_id, "Z")),
by="customer_id"
) %>%
select(
customer_id,
total_amount
) %>%
mutate(
std_amount = standardize(total_amount),
rgl_amount = regularize(total_amount),
log10_amount = log(total_amount, base = 10)
) -> res
res %>% head(10)
R-062
df_receipt %>%
group_by(customer_id) %>%
summarise(total_amount = sum(amount)) %>%
inner_join(
df_customer %>%
dplyr::filter(!str_starts(customer_id, "Z")),
by="customer_id"
) %>%
select(
customer_id,
total_amount
) %>%
mutate(
std_amount = standardize(total_amount),
rgl_amount = regularize(total_amount),
log10_amount = log(total_amount, base = 10),
loge_amount = log(total_amount)
) -> res
res %>% head(10)
R-063
df_product %>%
mutate(unit_profit = unit_price - unit_cost) %>%
select(
product_cd,
unit_price,
unit_cost,
unit_profit
) %>% head(10)
R-064
df_product %>%
mutate(unit_profit = unit_price - unit_cost) %>%
select(
product_cd,
unit_price,
unit_cost,
unit_profit
) %>%
drop_na() %>%
summarise(avg_profit = mean(unit_profit))
R-065
df_product %>%
drop_na() %>%
mutate(
unit_cost_new = round(unit_price * 0.7),
unit_profit = unit_price - unit_cost,
unit_profit_new = unit_price - unit_cost_new,
unit_profit_rate = round(unit_profit / unit_price, 3),
unit_profit_rate_new = round(unit_profit_new / unit_price, 3)
) %>%
select(
starts_with("unit")
) %>% head(10)
R-066
df_product %>%
drop_na() %>%
mutate(
unit_price_new = round(unit_cost / 70 * 100),
unit_profit = unit_price - unit_cost,
unit_profit_new = unit_price_new - unit_cost,
unit_profit_rate = round(unit_profit / unit_price, 3),
unit_profit_rate_new = round(unit_profit_new / unit_price_new, 3)
) %>%
select(
starts_with("unit")
) %>% head(10)
R-067
df_product %>%
drop_na() %>%
mutate(
unit_price_new = ceiling(unit_cost / 70 * 100),
unit_profit = unit_price - unit_cost,
unit_profit_new = unit_price_new - unit_cost,
unit_profit_rate = round(unit_profit / unit_price, 3),
unit_profit_rate_new = round(unit_profit_new / unit_price_new, 3)
) %>%
select(
starts_with("unit")
) %>% head(10)
R-068
df_product %>%
drop_na() %>%
mutate(
unit_price_taxed = floor(unit_price * 1.10)
) %>%
select(starts_with("unit")) %>%
head(10)
R-069
joined <- df_receipt %>%
select(
customer_id,
product_cd,
quantity,
amount,
) %>%
left_join(
df_product %>%
select(product_cd, category_major_cd),
by="product_cd"
)
left_join(
joined %>%
group_by(customer_id) %>%
summarise(total_amount = sum(amount)),
joined %>%
dplyr::filter(category_major_cd == "07") %>%
group_by(customer_id) %>%
summarise(total_amount_07 = sum(amount))
,by="customer_id"
) %>%
mutate(
rate = total_amount_07 / total_amount
) %>%
drop_na() %>%
head(10)
R-070
df_receipt %>%
left_join(
df_customer %>% select(customer_id, application_date),
by="customer_id",
) %>%
mutate(
sales_ymd = lubridate::parse_date_time(sales_ymd, "%Y%m%d"),
application_date = lubridate::parse_date_time(application_date, "%Y%m%d"),
days_since_application = sales_ymd - application_date,
) %>%
select(
customer_id,
sales_ymd,
application_date,
days_since_application
) %>%
head(10)
Discussion