😽
いろいろなプログラミング言語で NFD の濁音・半濁音を NFC に変換する
いろいろな言語で書いたコードを1つの記事にまとめることにした
JavaScript
test.js
str = "は\u{3099}は\u{3099}とハ\u{309A}ハ\u{309A}と神";
console.log(
11 === str.length,
7 === dakuon_normalize(str).length
);
function dakuon_normalize(str) {
const pattern = /[\p{sc=Hiragana}\p{sc=Katakana}]\p{gc=Mn}/gu;
return str.replace(pattern, function(matches) {
return matches[0].normalize('NFC');
});
}
Ruby
test.rb
def dakuon_normalize(str)
pattern = /[\p{Hiragana}\p{Katakana}]\p{Mn}/
str.gsub(pattern) {|m| m.unicode_normalize(:nfc) }
end
str = "は\u{3099}は\u{3099}とハ\u{309A}ハ\u{309A}と神"
p dakuon_normalize(str)
Python
test.py
import unicodedata
import regex
def dakuon_normalize(str):
pattern = r'[\p{Hiragana}\p{Katakana}]\p{Mn}'
return regex.sub(pattern,
lambda m: unicodedata.normalize('NFC', m.group(0)),
str)
str = "は\u3099は\u3099とハ\u309Aハ\u309Aと神"
print(len(str))
print(len(dakuon_normalize(str)))
PHP
test.php
<?php
$str = "は\u{3099}は\u{3099}とハ\u{309A}ハ\u{309A}と神";
$ret = 'ばばとパパと神';
var_dump(
$ret === dakuon_normalize($str)
);
function dakuon_normalize(string $str): string {
pattern = '/[\p{Hiragana}\p{Katakana}]\p{Mn}/su';
return preg_replace_callback(
pattern,
fn($matches) => normalizer_normalize($matches[0]),
$str);
}
Discussion