🙆

データサイエンス100本ノック(構造化データ加工編)をRで解く 91 - 100

2021/11/21に公開

Top
前の問題:81 - 90

R-091

set.seed(100)
bind_rows(
    customer_has_amount,
    customer_has_no_amount %>%
        slice_sample(n = nrow(customer_has_amount))
) -> under_sampled

under_sampled %>%
    mutate(has_amount = amount == 0) %>%
    group_by(has_amount) %>%
    summarise(n=n())

R-092

df_customer %>%
    select(gender_cd, gender) %>%
    unique() %>%
    arrange(gender_cd) -> df_gender

df_customer %>%
    select(-gender) %>%
    head(10)

R-093

df_category %>%
    select(starts_with("category_major")) %>%
    unique() %>%
    arrange(category_major_cd) -> cat_mjr_cd

df_category %>%
    select(starts_with("category_medium")) %>%
    unique() %>%
    arrange(category_medium_cd) -> cat_mdm_cd

df_category %>%
    select(starts_with("category_small")) %>%
    unique() %>%
    arrange(category_small_cd) -> cat_sml_cd

df_product %>%
    left_join(cat_mjr_cd, by="category_major_cd") %>%
    left_join(cat_mdm_cd, by="category_medium_cd") %>%
    left_join(cat_sml_cd, by="category_small_cd") %>%
    head(10)

R-094

# 格納先のフォルダを作成する
if(!dir.exists("data/mywork")){
    dir.create("data/mywork")
}

df_product %>%
    left_join(cat_mjr_cd, by="category_major_cd") %>%
    left_join(cat_mdm_cd, by="category_medium_cd") %>%
    left_join(cat_sml_cd, by="category_small_cd") %>%
    write.csv(.,"data/mywork/df_product_with_category_names.csv", row.names=FALSE)

R-095

df_product %>%
    left_join(cat_mjr_cd, by="category_major_cd") %>%
    left_join(cat_mdm_cd, by="category_medium_cd") %>%
    left_join(cat_sml_cd, by="category_small_cd") %>%
    write.csv(.,"data/mywork/df_product_with_category_names_cp932.csv", row.names=FALSE, fileEncoding="CP932")

R-096

df_product %>%
    left_join(cat_mjr_cd, by="category_major_cd") %>%
    left_join(cat_mdm_cd, by="category_medium_cd") %>%
    left_join(cat_sml_cd, by="category_small_cd") %>%
    write.table(.,"data/mywork/df_product_with_category_names_no_header.csv", col.names=FALSE, sep = ",")

R-097

readr::read_csv("data/mywork/df_product_with_category_names.csv") %>%
    head(10)

R-098

readr::read_csv("data/mywork/df_product_with_category_names_no_header.csv", col_names=FALSE) %>%
    head(10)

R-099

df_product %>%
    left_join(cat_mjr_cd, by="category_major_cd") %>%
    left_join(cat_mdm_cd, by="category_medium_cd") %>%
    left_join(cat_sml_cd, by="category_small_cd") %>%
    readr::write_tsv("data/mywork/df_product_with_category_names.tsv")

R-100

readr::read_tsv("data/mywork/df_product_with_category_names.tsv") %>%
    head(10)

Discussion