💭
【PHP】書記素クラスター単位でコールバックを適用する関数を定義する
まずは preg_replace_callback
を使って定義する
<?php
$str = "ハ\u{309A}ハ\u{309A}";
var_dump(
"パパ" === str_each_grapheme(
$str, function($grapheme) { return normalizer_normalize($grapheme); })
);
<?php
function str_each_grapheme(string $str, callable $callback): string {
$index = 0;
return preg_replace_callback('/\X/su', function($matches) use ($callback, &$index) {
++$index;
return $callback($matches[0], $index - 1);
}, $str);
}
IntlBreakIterator を使った定義は次のとおり
function str_each_grapheme3(string $str, callable $callback): string {
$it = IntlBreakIterator::createCharacterInstance();
$it->setText($str);
$current = 0;
$ret = '';
foreach ($it as $next) {
if ($next === 0) {
continue;
}
$size = $next - $current;
$char = substr($str, $current, $size);
$ret .= $callback($char);
$current = $next;
}
return $ret;
}
grapheme_strlen
、grapheme_substr
を使った定義は次のとおり
function str_each_grapheme4(string $str, callable $callback): string {
$size = grapheme_strlen($str);
$ret = '';
for ($i = 0; $i < $size; ++$i) {
$g = grapheme_substr($str, $i, 1);
$ret .= $callback($g);
}
return $ret;
}
Discussion