🙆
データサイエンス100本ノック(構造化データ加工編)をRで解く 91 - 100
R-091
set.seed(100)
bind_rows(
customer_has_amount,
customer_has_no_amount %>%
slice_sample(n = nrow(customer_has_amount))
) -> under_sampled
under_sampled %>%
mutate(has_amount = amount == 0) %>%
group_by(has_amount) %>%
summarise(n=n())
R-092
df_customer %>%
select(gender_cd, gender) %>%
unique() %>%
arrange(gender_cd) -> df_gender
df_customer %>%
select(-gender) %>%
head(10)
R-093
df_category %>%
select(starts_with("category_major")) %>%
unique() %>%
arrange(category_major_cd) -> cat_mjr_cd
df_category %>%
select(starts_with("category_medium")) %>%
unique() %>%
arrange(category_medium_cd) -> cat_mdm_cd
df_category %>%
select(starts_with("category_small")) %>%
unique() %>%
arrange(category_small_cd) -> cat_sml_cd
df_product %>%
left_join(cat_mjr_cd, by="category_major_cd") %>%
left_join(cat_mdm_cd, by="category_medium_cd") %>%
left_join(cat_sml_cd, by="category_small_cd") %>%
head(10)
R-094
# 格納先のフォルダを作成する
if(!dir.exists("data/mywork")){
dir.create("data/mywork")
}
df_product %>%
left_join(cat_mjr_cd, by="category_major_cd") %>%
left_join(cat_mdm_cd, by="category_medium_cd") %>%
left_join(cat_sml_cd, by="category_small_cd") %>%
write.csv(.,"data/mywork/df_product_with_category_names.csv", row.names=FALSE)
R-095
df_product %>%
left_join(cat_mjr_cd, by="category_major_cd") %>%
left_join(cat_mdm_cd, by="category_medium_cd") %>%
left_join(cat_sml_cd, by="category_small_cd") %>%
write.csv(.,"data/mywork/df_product_with_category_names_cp932.csv", row.names=FALSE, fileEncoding="CP932")
R-096
df_product %>%
left_join(cat_mjr_cd, by="category_major_cd") %>%
left_join(cat_mdm_cd, by="category_medium_cd") %>%
left_join(cat_sml_cd, by="category_small_cd") %>%
write.table(.,"data/mywork/df_product_with_category_names_no_header.csv", col.names=FALSE, sep = ",")
R-097
readr::read_csv("data/mywork/df_product_with_category_names.csv") %>%
head(10)
R-098
readr::read_csv("data/mywork/df_product_with_category_names_no_header.csv", col_names=FALSE) %>%
head(10)
R-099
df_product %>%
left_join(cat_mjr_cd, by="category_major_cd") %>%
left_join(cat_mdm_cd, by="category_medium_cd") %>%
left_join(cat_sml_cd, by="category_small_cd") %>%
readr::write_tsv("data/mywork/df_product_with_category_names.tsv")
R-100
readr::read_tsv("data/mywork/df_product_with_category_names.tsv") %>%
head(10)
Discussion