Open6ヶ月前にコメント追加4

【x86/x64 only】Python と OpenCV と Pillow-SIMD でできる限り高速に外部フォントを使用した日本語文字列を指定文字幅で改行しながらレンダリングする

Pillow がすでにインストール済みの場合

sudo apt-get remove -y --purge python3-pil
または
pip uninstall Pillow

x86/x64 前提のワークアラウンド

pip install \
opencv-contrib-python==4.10.0.84 \
pillow-simd==9.0.0.post1 \
numpy==1.24.3

確認

python -c "import PIL;print(PIL.__version__)"

9.0.0.post1

外部フォントでの描画と指定文字幅に到達したときに自動的に改行を挿入して動画に改行コード付き日本語文字列をレンダリングする無茶なコードスニペット

test_freetypefont_newline.py

import cv2
import time
import numpy as np
np.random.seed(39)
from PIL import Image, ImageDraw, ImageFont
from collections import defaultdict
from typing import List, Dict, Tuple

def draw_text(
    images: List[np.ndarray],
    texts: List[List[str]],
    x: int,
    y: int,
    font_size: int,
    wrap_width: int,
    font: ImageFont.FreeTypeFont="./NotoSansCJK-Bold.ttc",
) -> List[np.ndarray]:
    """Function to draw text using Pillow, wrapping it to fit the specified width

    Parameters
    ----------
    images: List[np.ndarray]
        Images to draw a string.

    texts: List[List[str]]
        Strings to be drawn.

    x: int
        Drawing start coordinate X.

    y: int
        Drawing start coordinate Y.

    font_size: int
        Font size.

    wrap_width: int
        Width of the base on which to wrap the string.

    font: ImageFont.FreeTypeFont
        Font. Default: "./NotoSansCJK-Bold.ttc"

    Returns
    -------
    images_after_rendering: List[np.ndarray]
    """
    # Character width cache
    pil_images: List[Image.Image] = []
    draws: List[ImageDraw.ImageDraw] = []
    pane_line_char_widths: List[Dict] = []
    line_height = 0

    start_time = time.perf_counter()
    for image, lines in zip(images, texts):
        start_time1 = time.perf_counter()
        pil_image = Image.fromarray(image)
        draw = ImageDraw.Draw(pil_image)
        print(f'  1-1. {(time.perf_counter()-start_time1)*1000} ms')

        start_time2 = time.perf_counter()
        line_char_widths: List[Dict] = []
        for line in lines:
            line_char_widths.append({char: font_size for char in set(line)})
        if line_height ==0:
            line_height = draw.textbbox((0, 0), 'A', font=font)[3]

        pil_images.append(pil_image)
        draws.append(draw)
        pane_line_char_widths.append(line_char_widths)
        print(f'  1-2. {(time.perf_counter()-start_time2)*1000} ms')
    print(f'1. {(time.perf_counter()-start_time)*1000} ms')

    start_time = time.perf_counter()
    images_after_rendering: List[np.ndarray] = []

    for pil_image, draw, text, line_char_widths in \
        zip(pil_images, draws, texts, pane_line_char_widths):

        y0 = y
        lines: List[str] = []
        for line, char_widths in zip(text, line_char_widths):
            start_time1 = time.perf_counter()
            current_line: str = ''
            current_width: int = 0
            for char in list(line):
                if char == '\n' or current_width + char_widths[char] > wrap_width:
                    lines.append(current_line)
                    current_line = char
                    current_width = char_widths[char]
                else:
                    current_line += char
                    current_width += char_widths[char]
            lines.append(F'{current_line}\n')
        print(f'  2-1. {(time.perf_counter()-start_time1)*1000} ms')

        start_time2 = time.perf_counter()
        for line in lines:
            draw.text((x, y0), line, font=font, fill=(255, 255, 255))
            y0 += line_height + 5  # Leave 5 pixels between lines
        print(f'  2-2. {(time.perf_counter()-start_time2)*1000} ms')

        images_after_rendering.append(np.asarray(pil_image))
    print(f'2. {(time.perf_counter()-start_time)*1000} ms')

    return images_after_rendering

def main():
    # Number of text display area panes
    text_pane_num = 2
    # fixed color scheme
    color_palette: List[Tuple] = [
        (40, 40, 40), # almost black
        (255, 51, 51), # blue
    ]
    # random auto color scheme
    if text_pane_num > len(color_palette):
        random_bgr_colors = \
            np.random.randint(
                0,
                256,
                size=(text_pane_num - len(color_palette), 3)
            )
        color_palette = \
            color_palette + \
                [tuple(random_bgr_color) for random_bgr_color in random_bgr_colors]

    texts: List[List[str]] = \
        [
            [
                "１行目：これはテストです。abcde文字列が長い場合、ここで折り返しが行われます。",
                "２行目：２行目がただしく改行されて表示されることを確認します。",
            ],
            [
                "１行目：２つ目のペイン用",
                "２行目：テストテストテストテストテストテストテストテストテストテストテスト",
            ],
        ]
    font_path: str = "./NotoSansCJK-Bold.ttc"
    font_size: int = 20
    font: ImageFont.FreeTypeFont = \
        ImageFont.truetype(font=font_path, size=font_size)


    # Init Camera
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        start_time = time.perf_counter()

        height, width, _ = frame.shape

        # Video Pane Size
        video_pane_height = height
        video_pane_width = width // 2

        # Text display pane size
        text_pane_width = 320
        text_pane_height = height // text_pane_num

        # Create base image for text display pane
        text_panes: List[np.ndarray] = \
            [
                np.full(
                    shape=(text_pane_height, text_pane_width, 3),
                    fill_value=color,
                    dtype=np.uint8,
                ) for color in color_palette
            ]

        # Draw strings in the display pane
        text_panes = \
            draw_text(
                images=text_panes,
                texts=texts,
                x=10,
                y=30,
                font_size=font_size,
                wrap_width=text_pane_width - font_size,
                font=font,
            )

        # Combine video pane and text display pane
        text_area = np.vstack(tuple(text_panes))
        combined = np.hstack((frame, text_area))

        end_time = (time.perf_counter() - start_time) * 1000
        cv2.putText(
            combined,
            f'{end_time:.2f} ms',
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (255, 255, 255),
            2,
            cv2.LINE_AA,
        )
        cv2.putText(
            combined,
            f'{end_time:.2f} ms',
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0,0,255),
            1,
            cv2.LINE_AA,
        )

        cv2.imshow(winname='Video with Text', mat=combined)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    if cap is not None:
        cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

pillow を使用して文字列が指定した幅を超えた時点で改行コードを自動的に外挿してレンダリングした結果（43 ms）
pillow-simd を使用して文字列が指定した幅を超えた時点で改行コードを自動的に外挿してレンダリングした結果（5.6 ms）まだ遅い。キレそう。

pillow-simd の処理ログ

  1-1. 0.1256969990208745 ms
  1-2. 0.047323061153292656 ms
  1-1. 0.09070802479982376 ms
  1-2. 0.00878400169312954 ms
1. 0.30529394280165434 ms
  2-1. 0.012679025530815125 ms
  2-2. 3.2826379174366593 ms
  2-1. 0.02065300941467285 ms
  2-2. 1.8087009666487575 ms
2. 5.431697005406022 ms
  1-1. 0.12287998106330633 ms
  1-2. 0.047530047595500946 ms
  1-1. 0.08998007979243994 ms
  1-2. 0.00819796696305275 ms
1. 0.300794024951756 ms
  2-1. 0.012642936781048775 ms
  2-2. 2.6476739440113306 ms
  2-1. 0.014274963177740574 ms
  2-2. 2.1945780608803034 ms
2. 5.162067012861371 ms