【Python】Unicodeで表示できる文字を出力する

ひらがな

サンプルコード

import unicodedata

# ひらがなリストを作成
list_hiragana = []

# Unicodeの範囲を網羅
for codepoint in range(0x3040, 0x309F + 1):  # ひらがなの範囲のみを指定
    try:
        char = chr(codepoint)
        if unicodedata.category(char) not in ["Cn", "Cs"]:  # 表示可能な文字のみ
            list_hiragana.append(char)
    except Exception:
        # 無効なコードポイントをスキップ
        continue

# ファイルに出力する関数
def write_to_file(filepath, data):
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write('\n'.join(data))

# ひらがなリストをファイルに出力
write_to_file('./output_hiragana.txt', list_hiragana)

# ひらがなリストを表示
print("ひらがな:", list_hiragana)

実行例

(.venv) $ python surrogate_pair_hiragana.py 
ひらがな: ['ぁ', 'あ', 'ぃ', 'い', 'ぅ', 'う', 'ぇ', 'え', 'ぉ', 'お', 'か', 'が', 'き', 'ぎ', 'く', 'ぐ', 'け', 'げ', 'こ', 'ご', 'さ', 'ざ', 'し', 'じ', 'す', 'ず', 'せ', 'ぜ', 'そ', 'ぞ', 'た', 'だ', 'ち', 'ぢ', 'っ', 'つ', 'づ', 'て', 'で', 'と', 'ど', 'な', 'に', 'ぬ', 'ね', 'の', 'は', 'ば', 'ぱ', 'ひ', 'び', 'ぴ', 'ふ', 'ぶ', 'ぷ', 'へ', 'べ', 'ぺ', 'ほ', 'ぼ', 'ぽ', 'ま', 'み', 'む', 'め', 'も', 'ゃ', 'や', 'ゅ', 'ゆ', 'ょ', 'よ', 'ら', 'り', 'る', 'れ', 'ろ', 'ゎ', 'わ', 'ゐ', 'ゑ', 'を', 'ん', 'ゔ', 'ゕ', 'ゖ', '゙', '゚', '゛', '゜', 'ゝ', 'ゞ', 'ゟ']
(.venv) $

10文字ごとに改行して出力する

import unicodedata

# ひらがなリストを作成
list_hiragana = []

# Unicodeの範囲を網羅
for codepoint in range(0x3040, 0x309F + 1):  # ひらがなの範囲のみを指定
    try:
        char = chr(codepoint)
        if unicodedata.category(char) not in ["Cn", "Cs"]:  # 表示可能な文字のみ
            list_hiragana.append(char)
    except Exception:
        # 無効なコードポイントをスキップ
        continue

# ファイルに出力する関数
def write_to_file(filepath, data):
    # 10文字ごとに分割
    chunks = [data[i:i+10] for i in range(0, len(data), 10)]
    with open(filepath, 'w', encoding='utf-8') as f:
        for chunk in chunks:
            f.write(''.join(chunk) + '\n')

# ひらがなリストをファイルに出力
write_to_file('./output_hiragana.txt', list_hiragana)

# ひらがなリストを表示
print("ひらがな:", list_hiragan

実行例

(.venv) $ cat output_hiragana.txt 
ぁあぃいぅうぇえぉお
かがきぎくぐけげこご
さざしじすずせぜそぞ
ただちぢっつづてでと
どなにぬねのはばぱひ
びぴふぶぷへべぺほぼ
ぽまみむめもゃやゅゆ
ょよらりるれろゎわゐ
ゑをんゔゕゖ゙゚゛゜
ゝゞゟ
(.venv) $

qa_dev

カタカナ

サンプルコード

import unicodedata

# カタカナリストを作成
list_katakana = []

# Unicodeの範囲を網羅
for codepoint in range(0x30A0, 0x30FF + 1):  # カタカナの範囲のみを指定
    try:
        char = chr(codepoint)
        if unicodedata.category(char) not in ["Cn", "Cs"]:  # 表示可能な文字のみ
            list_katakana.append(char)
    except Exception:
        # 無効なコードポイントをスキップ
        continue

# ファイルに出力する関数
def write_to_file(filepath, data):
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write('\n'.join(data))

# カタカナリストをファイルに出力
write_to_file('./output_katakana.txt', list_katakana)

# カタカナリストを表示
print("カタカナ:", list_katakana)

実行例

.venv) $ python surrogate_pair_katakana.py 
カタカナ: ['゠', 'ァ', 'ア', 'ィ', 'イ', 'ゥ', 'ウ', 'ェ', 'エ', 'ォ', 'オ', 'カ', 'ガ', 'キ', 'ギ', 'ク', 'グ', 'ケ', 'ゲ', 'コ', 'ゴ', 'サ', 'ザ', 'シ', 'ジ', 'ス', 'ズ', 'セ', 'ゼ', 'ソ', 'ゾ', 'タ', 'ダ', 'チ', 'ヂ', 'ッ', 'ツ', 'ヅ', 'テ', 'デ', 'ト', 'ド', 'ナ', 'ニ', 'ヌ', 'ネ', 'ノ', 'ハ', 'バ', 'パ', 'ヒ', 'ビ', 'ピ', 'フ', 'ブ', 'プ', 'ヘ', 'ベ', 'ペ', 'ホ', 'ボ', 'ポ', 'マ', 'ミ', 'ム', 'メ', 'モ', 'ャ', 'ヤ', 'ュ', 'ユ', 'ョ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ヮ', 'ワ', 'ヰ', 'ヱ', 'ヲ', 'ン', 'ヴ', 'ヵ', 'ヶ', 'ヷ', 'ヸ', 'ヹ', 'ヺ', '・', 'ー', 'ヽ', 'ヾ', 'ヿ']
(.venv) $

qa_dev

漢字

サンプルコード

import unicodedata

# 漢字リストを作成
list_kanji = []

# Unicodeの範囲を網羅
for codepoint in range(0x4E00, 0x9FFF + 1):  # 漢字の範囲のみを指定
    try:
        char = chr(codepoint)
        if unicodedata.category(char) not in ["Cn", "Cs"]:  # 表示可能な文字のみ
            list_kanji.append(char)
    except Exception:
        # 無効なコードポイントをスキップ
        continue

# ファイルに出力する関数
def write_to_file(filepath, data):
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write('\n'.join(data))

# 漢字リストをファイルに出力
write_to_file('./output_kanji.txt', list_kanji)

# 漢字リストを表示
print("漢字:", list_kanji)

実行例 (かなり長いので一部のみ紹介)

鼿', '齀', '齁', '齂', '齃', '齄', '齅', '齆', '齇', '齈', '齉', '齊', '齋', '齌', '齍', '齎', '齏', '齐', '齑', '齒', '齓', '齔', '齕', '齖', '齗', '齘', '齙', '齚', '齛', '齜', '齝', '齞', '齟', '齠', '齡', '齢', '齣', '齤', '齥', '齦', '齧', '齨', '齩', '齪', '齫', '齬', '齭', '齮', '齯', '齰', '齱', '齲', '齳', '齴', '齵', '齶', '齷', '齸', '齹', '齺', '齻', '齼', '齽', '齾', '齿', '龀', '龁', '龂', '龃', '龄', '龅', '龆', '龇', '龈', '龉', '龊', '龋', '龌', '龍', '龎', '龏', '龐', '龑', '龒', '龓', '龔', '龕', '龖', '龗', '龘', '龙', '龚', '龛', '龜', '龝', '龞', '龟', '龠', '龡', '龢', '龣', '龤', '龥', '龦', '龧', '龨', '龩', '龪', '龫', '龬', '龭', '龮', '龯', '龰', '龱', '龲', '龳', '龴', '龵', '龶', '龷', '龸', '龹', '龺', '龻', '龼', '龽', '龾', '龿', '鿀', '鿁', '鿂', '鿃', '鿄', '鿅', '鿆', '鿇', '鿈', '鿉', '鿊', '鿋', '鿌', '鿍', '鿎', '鿏', '鿐', '鿑', '鿒', '鿓', '鿔', '鿕', '鿖', '鿗', '鿘', '鿙', '鿚', '鿛', '鿜', '鿝', '鿞', '鿟', '鿠', '鿡', '鿢', '鿣', '鿤', '鿥', '鿦', '鿧', '鿨', '鿩', '鿪', '鿫', '鿬', '鿭', '鿮', '鿯', '鿰', '鿱', '鿲', '鿳', '鿴', '鿵', '鿶', '鿷', '鿸', '鿹', '鿺', '鿻', '鿼']
(.venv) $

qa_dev

Ascii 英数字

サンプルコード

import unicodedata

# ASCII英数字リストを作成
list_ascii = []

# Unicodeの範囲を網羅
for codepoint in range(0x0020, 0x007E + 1):  # ASCII英数字の範囲のみを指定
    try:
        char = chr(codepoint)
        if unicodedata.category(char) not in ["Cn", "Cs"]:  # 表示可能な文字のみ
            list_ascii.append(char)
    except Exception:
        # 無効なコードポイントをスキップ
        continue

# ファイルに出力する関数
def write_to_file(filepath, data):
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write('\n'.join(data))

# ASCII英数字リストをファイルに出力
write_to_file('./output_ascii.txt', list_ascii)

# ASCII英数字リストを表示
print("ASCII英数字:", list_ascii)

実行例

(.venv) $ python surrogate_pair_ascii.py 
ASCII英数字: [' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~']
(.venv) $

qa_dev

記号ver

サンプルコード

import unicodedata

# 記号リストを作成
list_symbols = []

# Unicodeの範囲を網羅
for codepoint in range(0x0000, 0x10FFFF + 1):  # 全Unicode範囲を対象
    try:
        char = chr(codepoint)
        if unicodedata.category(char).startswith("S"):  # 記号カテゴリのみ
            list_symbols.append(char)
    except Exception:
        # 無効なコードポイントをスキップ
        continue

# ファイルに出力する関数
def write_to_file(filepath, data):
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write('\n'.join(data))

# 記号リストをファイルに出力
write_to_file('./output_symbols.txt', list_symbols)

# 記号リストを表示
print("記号:", list_symbols)

###　補足
実行するとかなり量がある・・・・。