💡
EloquentのorderBy()とCollectionのsortBy()の挙動が異なったので調べてみた
きっかけ
業務で記号を含む文字のソートをしていた時に、MySQLからorderBy()したコレクションと、適当にselectしたものを後でsortした場合で並び順が異なっていたため、細かく調べてみた。
対象テーブル
id | character | bin_code | created_at | updated_at |
---|---|---|---|---|
1 | ! | 21 | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
2 | * | 2a | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
3 | # | 23 | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
4 | × | c397 | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
5 | ✕ | e29c95 | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
6 | ♯ | e299af | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
7 | A | 41 | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
8 | 0 | 30 | 2023-10-05 15:21:39 | 2023-10-05 15:21:39 |
sortByでソート
$model = new Character();
$collection = $model->get();
$collection->sortBy('character');
Illuminate\Database\Eloquent\Collection {#7003
all: [
0 => App\Models\Character {#7230
id: 1,
character: "!",
bin_code: "21",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
2 => App\Models\Character {#7232
id: 3,
character: "#",
bin_code: "23",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
1 => App\Models\Character {#7231
id: 2,
character: "*",
bin_code: "2a",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
7 => App\Models\Character {#7237
id: 8,
character: "0",
bin_code: "30",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
6 => App\Models\Character {#7236
id: 7,
character: "A",
bin_code: "41",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
3 => App\Models\Character {#7233
id: 4,
character: "<C3><97>", // ×
bin_code: "c397",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
5 => App\Models\Character {#7235
id: 6,
character: "<E2><99><AF>", // ♯
bin_code: "e299af",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
4 => App\Models\Character {#7234
id: 5,
character: "<E2><9C><95>", // ✕
bin_code: "e29c95",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
],
}
Eloquentで取得
$model = new Character();
$model->orderBy('character')->get();
Illuminate\Database\Eloquent\Collection {#7218
all: [
App\Models\Character {#7220
id: 1,
character: "!",
bin_code: "21",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7221
id: 2,
character: "*",
bin_code: "2a",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7222
id: 3,
character: "#",
bin_code: "23",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7223
id: 4,
character: "<C3><97>", // ×
bin_code: "c397",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7224
id: 5,
character: "<E2><9C><95>", // ✕
bin_code: "e29c95",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7225
id: 6,
character: "<E2><99><AF>", // ♯
bin_code: "e299af",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7226
id: 8,
character: "0",
bin_code: "30",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7227
id: 7,
character: "A",
bin_code: "41",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
],
}
sortByは16進数バイナリコード順になっているっぽい?
PHPのsort()
は文字コード順で並び替える実装になっているはず。
文字列は数値に変換して比較をしているので、ここでは文字コードに変換して比較し、(おそらく)ソートしている。
orderBy()はこれを見ただけでは何順かわからない。。。
MySQLのORDER BY
はCollationというのが関係しているらしい。
調べてみる
select * from INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = 'order_by_test';
CATALOG_NAME | SCHEMA_NAME | DEFAULT_CHARACTER_SET_NAME | DEFAULT_COLLATION_NAME | SQL_PATH | DEFAULT_ENCRYPTION |
---|---|---|---|---|---|
def | order_by_test | utf8mb4 | utf8mb4_0900_ai_ci | NO |
utf8mb4_0900_ai_ci
とは。。。
どうやら、Unicode Collation Algorithm 9.0.0というアルゴリズムでソートされているらしい。(理解力が足りていない)
utf8mb4_0900_ai_ciは
- Unicode Collation Algorithm 9.0.0のアルゴリズムで
- アクセントを区別せず
- 大文字小文字を区別しない
その結果、PHPのソートアルゴリズムとは異なる結果が得られるらしい。
どうすれば同じ結果が得られるのか
バイナリ順でソートをすれば問題なさそう。
$model->orderByRaw("characters.character COLLATE utf8mb4_bin")->get();
Illuminate\Database\Eloquent\Collection {#7218
all: [
App\Models\Character {#7231
id: 1,
character: "!",
bin_code: "21",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7232
id: 3,
character: "#",
bin_code: "23",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7233
id: 2,
character: "*",
bin_code: "2a",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7234
id: 8,
character: "0",
bin_code: "30",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7235
id: 7,
character: "A",
bin_code: "41",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7236
id: 4,
character: "<C3><97>",
bin_code: "c397",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7237
id: 6,
character: "<E2><99><AF>",
bin_code: "e299af",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
App\Models\Character {#7238
id: 5,
character: "<E2><9C><95>",
bin_code: "e29c95",
created_at: "2023-10-05 15:21:39",
updated_at: "2023-10-05 15:21:39",
},
],
}
Discussion