🚀

【PHP】書記素クラスターのジェネレーターをつくる

2024/07/14に公開

以前書いた記事を読み直してキーも返すように修正

<?php

$str = "か\u{3099}き\u{3099}く\u{3099}け\u{3099}こ\u{3099}";

$gen = grapheme_gen($str);
$gen2 = grapheme_gen2($str);
$gen3 = grapheme_gen3($str);

foreach ($gen as $key => $value) {
  echo $key, ' ', $value, PHP_EOL;
}

foreach ($gen2 as $key => $value) {
  echo $key, ' ', $value, PHP_EOL;
}

foreach ($gen3 as $key => $value) {
  echo $key, ' ', $value, PHP_EOL;
}

function grapheme_gen(string $str): Generator {
    $offset = 0;
    $key = 0;

    while (preg_match('/\X/us', $str, $matches, 0, $offset)) {
        yield $key => $matches[0];
        $offset += strlen($matches[0]);
        ++$key;
    }
}

function grapheme_gen2(string $str): Generator {

    $it = IntlBreakIterator::createCharacterInstance();
    $it->setText($str);
    $current = 0;
    $key = 0;

    foreach ($it as $next) {
        if ($next === 0) {
            continue;
        }

        $size = $next - $current;
        yield $key => substr($str, $current, $size);
        $current = $next;
        ++$key;
    }

}

function grapheme_gen3(string $str): Generator {

    mb_ereg_search_init($str, '\X');
    $key = 0;

    while (mb_ereg_search()) {
        $matches = mb_ereg_search_getregs();
        yield $key => $matches[0];
        ++$key;
    }
}

Discussion