📑

いろいろなプログラミング言語で NFD の濁音・半濁音を NFC に変換する・その2

2024/07/18に公開

Go

package main

import (
    "regexp"
    "golang.org/x/text/unicode/norm"
    "unicode/utf8"
)

func dakuon_normalize(str string) string {
    re := regexp.MustCompile(`[\p{Hiragana}\p{Katakana}]\p{Mn}`)
    return re.ReplaceAllStringFunc(str, func(m string) string {
        return norm.NFC.String(m)
    })
}

func main() {
    str := "は\u3099は\u3099とハ\u309Aハ\u309Aと神"
    ret := dakuon_normalize(str)

    println(utf8.RuneCountInString(str))
    println(utf8.RuneCountInString(ret))
    println(ret)
}

Rust

use std::borrow::Cow;
use regex::{Regex, Captures};
use unicode_normalization::UnicodeNormalization;

fn dakuon_normalize<'h>(str: &'h str)-> Cow<'h, str> {
    let regex = Regex::new(r"[\p{Hiragana}\p{Katakana}]\p{Mn}").unwrap();
    return regex.replace_all(str, |caps: &Captures| {
        caps[0].parse::<String>().unwrap().nfc().collect::<String>()
    });
}

fn main() {
    let str = "は\u{003099}は\u{003099}とハ\u{00309A}ハ\u{00309A}と神";
    let ret = dakuon_normalize(str);

    println!("{}", str.chars().count());
    println!("{}", ret.chars().count());
    println!("{}", ret);
}

Swift

import Foundation

extension String {
    var dakuonNormalize: String {
            let pattern = #"[\p{Hiragana}\p{Katakana}]\p{Nonspacing Mark}"#
            let regex = try! Regex(pattern).matchingSemantics(.unicodeScalar)
            return self.replacing(regex) { match in
                match.0.precomposedStringWithCanonicalMapping
            }
    }
}

let str = "は\u{3099}は\u{3099}とハ\u{309A}ハ\u{309A}と神"
let ret = str.dakuonNormalize

print(str.unicodeScalars.count)
print(ret.unicodeScalars.count)
print(ret)

Kotlin

スクリプトとして実行して確認した

kotlinc -script test.kts
test.kts
import java.text.Normalizer

fun dakuonNormalize(str: String): String {
    return str.replace("""[\p{IsHiragana}\p{IsKatakana}]\p{Mn}""".toRegex()) {
        Normalizer.normalize(it.value, Normalizer.Form.NFC);
    }
}

val str = "は\u3099は\u3099とハ\u309Aハ\u309Aと神"
val ret = dakuonNormalize(str)

println(str.length)
println(ret.length)
println(ret)

Discussion