Open4
【x86/x64 only】Python と OpenCV と Pillow-SIMD でできる限り高速に外部フォントを使用した日本語文字列を指定文字幅で改行しながらレンダリングする
-
Pillow
がすでにインストール済みの場合sudo apt-get remove -y --purge python3-pil または pip uninstall Pillow
-
x86/x64 前提のワークアラウンド
pip install \ opencv-contrib-python==4.10.0.84 \ pillow-simd==9.0.0.post1 \ numpy==1.24.3
-
確認
python -c "import PIL;print(PIL.__version__)" 9.0.0.post1
外部フォントでの描画と指定文字幅に到達したときに自動的に改行を挿入して動画に改行コード付き日本語文字列をレンダリングする無茶なコードスニペット
test_freetypefont_newline.py
import cv2
import time
import numpy as np
np.random.seed(39)
from PIL import Image, ImageDraw, ImageFont
from collections import defaultdict
from typing import List, Dict, Tuple
def draw_text(
images: List[np.ndarray],
texts: List[List[str]],
x: int,
y: int,
font_size: int,
wrap_width: int,
font: ImageFont.FreeTypeFont="./NotoSansCJK-Bold.ttc",
) -> List[np.ndarray]:
"""Function to draw text using Pillow, wrapping it to fit the specified width
Parameters
----------
images: List[np.ndarray]
Images to draw a string.
texts: List[List[str]]
Strings to be drawn.
x: int
Drawing start coordinate X.
y: int
Drawing start coordinate Y.
font_size: int
Font size.
wrap_width: int
Width of the base on which to wrap the string.
font: ImageFont.FreeTypeFont
Font. Default: "./NotoSansCJK-Bold.ttc"
Returns
-------
images_after_rendering: List[np.ndarray]
"""
# Character width cache
pil_images: List[Image.Image] = []
draws: List[ImageDraw.ImageDraw] = []
pane_line_char_widths: List[Dict] = []
line_height = 0
start_time = time.perf_counter()
for image, lines in zip(images, texts):
start_time1 = time.perf_counter()
pil_image = Image.fromarray(image)
draw = ImageDraw.Draw(pil_image)
print(f' 1-1. {(time.perf_counter()-start_time1)*1000} ms')
start_time2 = time.perf_counter()
line_char_widths: List[Dict] = []
for line in lines:
line_char_widths.append({char: font_size for char in set(line)})
if line_height ==0:
line_height = draw.textbbox((0, 0), 'A', font=font)[3]
pil_images.append(pil_image)
draws.append(draw)
pane_line_char_widths.append(line_char_widths)
print(f' 1-2. {(time.perf_counter()-start_time2)*1000} ms')
print(f'1. {(time.perf_counter()-start_time)*1000} ms')
start_time = time.perf_counter()
images_after_rendering: List[np.ndarray] = []
for pil_image, draw, text, line_char_widths in \
zip(pil_images, draws, texts, pane_line_char_widths):
y0 = y
lines: List[str] = []
for line, char_widths in zip(text, line_char_widths):
start_time1 = time.perf_counter()
current_line: str = ''
current_width: int = 0
for char in list(line):
if char == '\n' or current_width + char_widths[char] > wrap_width:
lines.append(current_line)
current_line = char
current_width = char_widths[char]
else:
current_line += char
current_width += char_widths[char]
lines.append(F'{current_line}\n')
print(f' 2-1. {(time.perf_counter()-start_time1)*1000} ms')
start_time2 = time.perf_counter()
for line in lines:
draw.text((x, y0), line, font=font, fill=(255, 255, 255))
y0 += line_height + 5 # Leave 5 pixels between lines
print(f' 2-2. {(time.perf_counter()-start_time2)*1000} ms')
images_after_rendering.append(np.asarray(pil_image))
print(f'2. {(time.perf_counter()-start_time)*1000} ms')
return images_after_rendering
def main():
# Number of text display area panes
text_pane_num = 2
# fixed color scheme
color_palette: List[Tuple] = [
(40, 40, 40), # almost black
(255, 51, 51), # blue
]
# random auto color scheme
if text_pane_num > len(color_palette):
random_bgr_colors = \
np.random.randint(
0,
256,
size=(text_pane_num - len(color_palette), 3)
)
color_palette = \
color_palette + \
[tuple(random_bgr_color) for random_bgr_color in random_bgr_colors]
texts: List[List[str]] = \
[
[
"1行目:これはテストです。abcde文字列が長い場合、ここで折り返しが行われます。",
"2行目:2行目がただしく改行されて表示されることを確認します。",
],
[
"1行目:2つ目のペイン用",
"2行目:テストテストテストテストテストテストテストテストテストテストテスト",
],
]
font_path: str = "./NotoSansCJK-Bold.ttc"
font_size: int = 20
font: ImageFont.FreeTypeFont = \
ImageFont.truetype(font=font_path, size=font_size)
# Init Camera
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
start_time = time.perf_counter()
height, width, _ = frame.shape
# Video Pane Size
video_pane_height = height
video_pane_width = width // 2
# Text display pane size
text_pane_width = 320
text_pane_height = height // text_pane_num
# Create base image for text display pane
text_panes: List[np.ndarray] = \
[
np.full(
shape=(text_pane_height, text_pane_width, 3),
fill_value=color,
dtype=np.uint8,
) for color in color_palette
]
# Draw strings in the display pane
text_panes = \
draw_text(
images=text_panes,
texts=texts,
x=10,
y=30,
font_size=font_size,
wrap_width=text_pane_width - font_size,
font=font,
)
# Combine video pane and text display pane
text_area = np.vstack(tuple(text_panes))
combined = np.hstack((frame, text_area))
end_time = (time.perf_counter() - start_time) * 1000
cv2.putText(
combined,
f'{end_time:.2f} ms',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
cv2.LINE_AA,
)
cv2.putText(
combined,
f'{end_time:.2f} ms',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0,0,255),
1,
cv2.LINE_AA,
)
cv2.imshow(winname='Video with Text', mat=combined)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if cap is not None:
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
- pillow を使用して文字列が指定した幅を超えた時点で改行コードを自動的に外挿してレンダリングした結果(43 ms)
- pillow-simd を使用して文字列が指定した幅を超えた時点で改行コードを自動的に外挿してレンダリングした結果(5.6 ms)まだ遅い。キレそう。
- pillow-simd の処理ログ
1-1. 0.1256969990208745 ms 1-2. 0.047323061153292656 ms 1-1. 0.09070802479982376 ms 1-2. 0.00878400169312954 ms 1. 0.30529394280165434 ms 2-1. 0.012679025530815125 ms 2-2. 3.2826379174366593 ms 2-1. 0.02065300941467285 ms 2-2. 1.8087009666487575 ms 2. 5.431697005406022 ms 1-1. 0.12287998106330633 ms 1-2. 0.047530047595500946 ms 1-1. 0.08998007979243994 ms 1-2. 0.00819796696305275 ms 1. 0.300794024951756 ms 2-1. 0.012642936781048775 ms 2-2. 2.6476739440113306 ms 2-1. 0.014274963177740574 ms 2-2. 2.1945780608803034 ms 2. 5.162067012861371 ms