📚

PowerQuery で 主要な統計情報を取得

2022/10/15に公開

背景

PowerQueryで主要な統計情報を取得する。

GUI で取得する

https://support.microsoft.com/ja-jp/office/統計情報を表示するプロファイル-データ-power-query-79616636-43aa-428f-b14b-f9c5c060f6b2

Table.Profile を使用する

https://learn.microsoft.com/ja-jp/powerquery-m/table-profile

列を元のデータ順に並び変える

関数名:TableProfile

(TempTable) =>
// TableのProfileを取得し元の列順に並べ替える
    let 
        //TempTable = Source,
        Profile = Table.Profile(TempTable),
        Schema = Table.Schema(TempTable),
        NestedJoin = Table.NestedJoin(Profile, "Column", Schema, "Name", "JoinTable"),
        ExpandTableColumn = Table.ExpandTableColumn(NestedJoin, "JoinTable", {"Position", "Kind"}, {"Position", "Kind"}),
        Sort = Table.Sort(ExpandTableColumn,{{"Position", Order.Ascending}}),
        ReorderColumns = Table.ReorderColumns(Sort, {"Position", "Column", "Kind", "Count", "NullCount", "DistinctCount", "Max", "Min", "Average", "StandardDeviation"}),
        result = ReorderColumns
    in
        result

自前で作成する

let
    Source = Tableデータ,
    Base = (name) =>
        let
            row = Table.Column(Source, name),
            result = [
                Name = name, 
                Count = List.Count(row), 
                NullCount = (List.Count(row) - List.NonNullCount(row)), 
                Empty = List.Count(List.Select(row, each _ = "")),
                Mean = List.Average(row), 
                Std = List.StandardDeviation(row),
                Min = List.Min(row), 
                Max = List.Max(row), 
                Median = List.Median(row), 
                Distinct = List.Count(List.Distinct(row)), 
                Mode = List.Mode(row), 
                Freq = List.Count(List.Select(row, each _ = List.Mode(row)))
            ]
        in
            result,
    result =  Table.FromRecords(List.Transform(Table.ColumnNames(Source), each Base(_)))
in
    result

Discussion