📑
いろいろなプログラミング言語で NFD の濁音・半濁音を NFC に変換する・その2
Go
package main
import (
"regexp"
"golang.org/x/text/unicode/norm"
"unicode/utf8"
)
func dakuon_normalize(str string) string {
re := regexp.MustCompile(`[\p{Hiragana}\p{Katakana}]\p{Mn}`)
return re.ReplaceAllStringFunc(str, func(m string) string {
return norm.NFC.String(m)
})
}
func main() {
str := "は\u3099は\u3099とハ\u309Aハ\u309Aと神"
ret := dakuon_normalize(str)
println(utf8.RuneCountInString(str))
println(utf8.RuneCountInString(ret))
println(ret)
}
Rust
use std::borrow::Cow;
use regex::{Regex, Captures};
use unicode_normalization::UnicodeNormalization;
fn dakuon_normalize<'h>(str: &'h str)-> Cow<'h, str> {
let regex = Regex::new(r"[\p{Hiragana}\p{Katakana}]\p{Mn}").unwrap();
return regex.replace_all(str, |caps: &Captures| {
caps[0].parse::<String>().unwrap().nfc().collect::<String>()
});
}
fn main() {
let str = "は\u{003099}は\u{003099}とハ\u{00309A}ハ\u{00309A}と神";
let ret = dakuon_normalize(str);
println!("{}", str.chars().count());
println!("{}", ret.chars().count());
println!("{}", ret);
}
Swift
import Foundation
extension String {
var dakuonNormalize: String {
let pattern = #"[\p{Hiragana}\p{Katakana}]\p{Nonspacing Mark}"#
let regex = try! Regex(pattern).matchingSemantics(.unicodeScalar)
return self.replacing(regex) { match in
match.0.precomposedStringWithCanonicalMapping
}
}
}
let str = "は\u{3099}は\u{3099}とハ\u{309A}ハ\u{309A}と神"
let ret = str.dakuonNormalize
print(str.unicodeScalars.count)
print(ret.unicodeScalars.count)
print(ret)
Kotlin
スクリプトとして実行して確認した
kotlinc -script test.kts
test.kts
import java.text.Normalizer
fun dakuonNormalize(str: String): String {
return str.replace("""[\p{IsHiragana}\p{IsKatakana}]\p{Mn}""".toRegex()) {
Normalizer.normalize(it.value, Normalizer.Form.NFC);
}
}
val str = "は\u3099は\u3099とハ\u309Aハ\u309Aと神"
val ret = dakuonNormalize(str)
println(str.length)
println(ret.length)
println(ret)
Discussion