💭
複数の切り取り画像から元の画像を復元する

2024/10/27に公開
 はじめに複数の切り取り画像から元画像を復元したかったため、Pythonにやってもらいました。

 目的複数の切り取り画像を入力として、それらを結合し大きい元画像を出力します。

 インプット
 複数の切り取り画像前提として、各画像間で共通の箇所が写っていることとします。











 アウトプット
 元画像以下のようになってほしい

 コード画像のディレクトリに読み込んだ後に、

1つ目の画像と2つ目の画像の共通する特徴点を算出し、それがマッチするかのように合成する。

その後、合成画像に3つ目の画像、4つ目の画像、…、とどんどん特徴点算出と合成を繰り返し、

ディレクトリの画像がすべてなくなるまで行います。
import cv2
import numpy as np
import glob
import os

# 画像が保存されているディレクトリのパス
image_dir = 'img/'

# 対応する画像ファイルのパスを取得
image_files = []
for ext in ('*.png', '*.jpg', '*.jpeg', '*.bmp'):
    image_files.extend(glob.glob(os.path.join(image_dir, ext)))


# 特徴点検出器を初期化 (SIFT)
sift = cv2.SIFT_create()

# 最初の画像を読み込む
base_img = cv2.imread(image_files[0])

for image_path in image_files[1:]:
    next_img = cv2.imread(image_path)

    # 現在の画像（base_img）と次の画像（next_img）の特徴点と記述子を検出
    kp1, des1 = sift.detectAndCompute(base_img, None)
    kp2, des2 = sift.detectAndCompute(next_img, None)

    # BFMatcherでマッチング
    bf = cv2.BFMatcher(cv2.NORM_L2)
    matches = bf.knnMatch(des1, des2, k=2)

    # 良いマッチを選別
    good_matches = []
    for m, n in matches:
        if m.distance < 0.75 * n.distance:
            good_matches.append(m)

    # 良いマッチング点が十分にあるか確認
    if len(good_matches) > 10:
        # キーポイントの座標を抽出
        src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

        # ホモグラフィ行列を計算
        H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

        # base_imgの四隅を変換して、キャンバスの範囲を確認
        h1, w1 = base_img.shape[:2]
        h2, w2 = next_img.shape[:2]
        base_img_corners = np.float32([[0, 0], [0, h1], [w1, h1], [w1, 0]]).reshape(-1, 1, 2)
        base_img_transformed_corners = cv2.perspectiveTransform(base_img_corners, H)

        # 結合後のキャンバスの範囲を計算
        combined_corners = np.concatenate((base_img_transformed_corners, np.float32([[0, 0], [0, h2], [w2, h2], [w2, 0]]).reshape(-1, 1, 2)), axis=0)
        [x_min, y_min] = np.int32(combined_corners.min(axis=0).ravel())
        [x_max, y_max] = np.int32(combined_corners.max(axis=0).ravel())

        # 平行移動の調整
        translation_dist = [-x_min, -y_min]
        H_translation = np.array([[1, 0, translation_dist[0]], [0, 1, translation_dist[1]], [0, 0, 1]])

        # base_imgを変換して、新しいキャンバスに描画
        result_img = cv2.warpPerspective(base_img, H_translation.dot(H), (x_max - x_min, y_max - y_min))

        # 次の画像を結果に配置
        result_img[translation_dist[1]:h2 + translation_dist[1], translation_dist[0]:w2 + translation_dist[0]] = next_img

        # 次の画像を結合結果として更新
        base_img = result_img

    else:
        print(f"画像 {image_path} で十分なマッチングポイントがありません")

# 最終結果を表示
cv2.imshow('Result', base_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

]

 結果
ところどころ歪んでる…。

変に変形させる必要なかった。

 おわりに複数の切り取り画像を入力として、それらを結合し大きい元画像を出力しました。

大きい画像をいきなり1枚の画像に収めることが難しい場合に使えると思います。

 追記回転、せん断、拡大縮小をせず、平行移動だけで1枚の画像にしていきます。
import cv2
import numpy as np
import glob
import os

# 画像が保存されているディレクトリのパス
image_dir = 'img/'  # ディレクトリのパスを指定


image_files = []
for ext in ('*.png', '*.jpg', '*.jpeg', '*.bmp'):
    image_files.extend(glob.glob(os.path.join(image_dir, ext)))


# 特徴点検出器の初期化 (SIFT)
sift = cv2.SIFT_create()
bf = cv2.BFMatcher(cv2.NORM_L2)


# 最初の画像を基準に設定
base_img = cv2.imread(image_files[0])


# 順次合成結果を更新
for image_path in image_files[1:]:
    next_img = cv2.imread(image_path)
    # 基準画像と次の画像で特徴点を検出
    kp1, des1 = sift.detectAndCompute(base_img, None)
    kp2, des2 = sift.detectAndCompute(next_img, None)

    # 特徴点のマッチングと比率テストで良いマッチを選別
    matches = bf.knnMatch(des1, des2, k=2)
    good_matches = [m for m, n in matches if m.distance < 0.7 * n.distance] # ここの比率を下げれば精度が良いマッチング結果だけ残せる

    # マッチング結果を描画して確認
    match_img = cv2.drawMatches(base_img, kp1, next_img, kp2, good_matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    cv2.imshow('Feature Matching', match_img)
    cv2.waitKey(500)  # 0にすると一時停止。500にすると0.5秒表示。

    # マッチが不足している場合はスキップ
    if len(good_matches) < 10:
        print(f"画像 {image_path} のマッチングが不十分です。次の画像に進みます。")
        continue

    # RANSACを使用して平行移動のみのアフィン変換行列を推定
    src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
    M, inliers = cv2.estimateAffinePartial2D(dst_pts, src_pts, method=cv2.RANSAC, ransacReprojThreshold=5.0)

    if M is None:
        print("変換行列が見つかりませんでした。次の画像に進みます。")
        continue

    # 平行移動量を取得
    translation = M[:, 2].astype(int)

    # キャンバスのサイズを計算
    h1, w1 = base_img.shape[:2]
    h2, w2 = next_img.shape[:2]
    
    x_min = min(0, translation[0])
    y_min = min(0, translation[1])
    x_max = max(w1, w2 + translation[0])
    y_max = max(h1, h2 + translation[1])

    # キャンバスを作成して画像を配置
    translation_dist = [-x_min, -y_min]
    result_img = np.zeros((y_max - y_min, x_max - x_min, 3), dtype=np.uint8)

    # 基準画像をキャンバスに配置
    result_img[translation_dist[1]:h1 + translation_dist[1], translation_dist[0]:w1 + translation_dist[0]] = base_img

    # 次の画像を平行移動して配置
    x_offset = translation_dist[0] + translation[0]
    y_offset = translation_dist[1] + translation[1]
    result_img[y_offset:y_offset + h2, x_offset:x_offset + w2] = next_img

    # 合成結果を更新
    base_img = result_img

cv2.imshow("Result", base_img)
cv2.imwrite("Result.png", base_img)
cv2.waitKey(0)
cv2.destroyAllWindows()



こうなります。

 さらに追記cv2.phaseCorrelate()という関数がありました。

平行移動のみの位置合わせの場合、これで対応できるようです。
ディレクトリ内の画像を順番に読み込み、隣り合う画像同士をcv2.phaseCorrelateで自動的に位置合わせして合成。

その合成処理を繰り返し、最終的に画像が1枚になるか、合成できなくなるまで続けます。

最後に合成後の画像からは黒い余白部分をカットして完成です。
import cv2
import numpy as np
import os

def compute_shift(img1_gray, img2_gray):
    """シフト量と確信度を計算"""
    win_size = (img1_gray.shape[1], img1_gray.shape[0])
    win = cv2.createHanningWindow(win_size, cv2.CV_32F)
    img1_gray_win = img1_gray * win
    img2_gray_win = img2_gray * win
    shift, response = cv2.phaseCorrelate(img1_gray_win, img2_gray_win)
    return shift, response

def pad_images_to_same_size(images):
    """画像を同じサイズにパディングする"""
    max_height = max(img.shape[0] for img in images)
    max_width = max(img.shape[1] for img in images)
    padded_images = []
    for img in images:
        h, w = img.shape[:2]
        top = (max_height - h) // 2
        bottom = max_height - h - top
        left = (max_width - w) // 2
        right = max_width - w - left
        img_padded = cv2.copyMakeBorder(
            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0]
        )
        padded_images.append(img_padded)
    return padded_images

def crop_black_borders(img):
    """黒い余白をトリミングする"""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(
        thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )
    if contours:
        cnt = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(cnt)
        cropped_img = img[y : y + h, x : x + w]
        return cropped_img
    else:
        return img

def assemble_images(images, positions):
    """各画像を指定された位置に配置して合成する（黒色を無視）"""
    xs = [pos[0] for pos in positions]
    ys = [pos[1] for pos in positions]
    widths = [img.shape[1] for img in images]
    heights = [img.shape[0] for img in images]
    x_min = int(np.floor(min(xs)))
    y_min = int(np.floor(min(ys)))
    x_max = int(np.ceil(max([x + w for x, w in zip(xs, widths)])))
    y_max = int(np.ceil(max([y + h for y, h in zip(ys, heights)])))
    canvas_width = x_max - x_min
    canvas_height = y_max - y_min
    canvas = np.zeros((canvas_height, canvas_width, 3), dtype=np.uint8)
    for img, (x, y) in zip(images, positions):
        x_offset = int(round(x - x_min))
        y_offset = int(round(y - y_min))
        h, w = img.shape[:2]
        x_start = x_offset
        y_start = y_offset
        x_end = x_offset + w
        y_end = y_offset + h
        canvas_region = canvas[y_start:y_end, x_start:x_end]
        # マスクの作成と次元の拡張
        mask = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) > 0
        mask_3d = mask[:, :, np.newaxis]
        mask_3d = np.repeat(mask_3d, 3, axis=2)
        # マスクを使用して黒色以外のピクセルをコピー
        canvas_region[mask_3d] = img[mask_3d]
    return canvas

def merge_images_group(images, threshold):
    """画像をグループ化して結合する"""
    merged_groups = []
    current_group = [images[0]]
    positions = [(0.0, 0.0)]
    cumulative_dx = 0.0
    cumulative_dy = 0.0
    gray_images = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32) for img in images]
    for i in range(len(images) - 1):
        img1_gray = gray_images[i]
        img2_gray = gray_images[i + 1]
        shift, response = compute_shift(img1_gray, img2_gray)
        if response >= threshold:
            dx, dy = shift
            cumulative_dx -= dx
            cumulative_dy -= dy
            positions.append((cumulative_dx, cumulative_dy))
            current_group.append(images[i + 1])
        else:
            merged_img = assemble_images(current_group, positions)
            merged_groups.append(merged_img)
            current_group = [images[i + 1]]
            positions = [(0.0, 0.0)]
            cumulative_dx = 0.0
            cumulative_dy = 0.0
    if current_group:
        merged_img = assemble_images(current_group, positions)
        merged_groups.append(merged_img)
    return merged_groups

def recursive_merge(images, threshold):
    """再帰的に画像を結合する"""
    previous_length = len(images)
    iteration = 0
    while True:
        iteration += 1
        print(f"Iteration {iteration}: {len(images)} images")
        images = pad_images_to_same_size(images)
        merged_images = merge_images_group(images, threshold)
        current_length = len(merged_images)
        if current_length == previous_length:
            break
        images = merged_images
        previous_length = current_length
        if current_length == 1:
            break
    return images

def merge_images_in_directory(directory_path, threshold=0.2):
    """ディレクトリ内の画像を再帰的に合成する"""
    image_files = sorted(
        [
            f
            for f in os.listdir(directory_path)
            if f.lower().endswith((".png", ".jpg", ".jpeg"))
        ]
    )
    if not image_files:
        print("画像がディレクトリに見つかりませんでした。")
        return None
    images = [cv2.imread(os.path.join(directory_path, f), cv2.IMREAD_COLOR) for f in image_files]
    merged_images = recursive_merge(images, threshold)
    for idx, merged_img in enumerate(merged_images):
        final_image = crop_black_borders(merged_img)
        cv2.imwrite(f"output/merged_result_{str(idx+1).zfill(5)}.png", final_image)
        print(f"結合画像を保存しました: merged_result_{idx+1}.png")
    print("全ての画像の結合が完了しました。")
    return merged_images

# 使用例
if __name__ == "__main__":
    directory = "img"  # 画像が保存されているディレクトリのパスに置き換えてください
    merge_images_in_directory(directory, threshold=0.25)



 ベンチマーク作成用コードimport cv2
import os

def split_image_with_overlap_snake(input_image_path, output_dir, tile_width, tile_height, overlap_ratio=0.1):
    """画像をオーバーラップ付きで蛇状に分割して保存します"""
    os.makedirs(output_dir, exist_ok=True)
    
    image = cv2.imread(input_image_path)
    img_height, img_width = image.shape[:2]

    # オーバーラップするピクセル数を計算
    overlap_w = int(tile_width * overlap_ratio)
    overlap_h = int(tile_height * overlap_ratio)
    
    tile_id = 0  # 分割画像のカウント
    max_tiles = (img_height // tile_height) * (img_width // tile_width)  # 推定の最大タイル数

    # 縦方向のループ
    for y in range(0, img_height - tile_height + 1, tile_height - overlap_h):
        # 奇数行と偶数行でxの方向を変更（蛇状）
        if (y // (tile_height - overlap_h)) % 2 == 0:
            x_range = range(0, img_width - tile_width + 1, tile_width - overlap_w)
        else:
            x_range = range(img_width - tile_width, -1, -(tile_width - overlap_w))

        # 横方向のループ
        for x in x_range:
            tile = image[y:y + tile_height, x:x + tile_width]

            # ファイル名をゼロパディングで作成して保存
            tile_filename = f"tile_{tile_id:04d}.png"  # 4桁ゼロパディング
            tile_path = os.path.join(output_dir, tile_filename)
            cv2.imwrite(tile_path, tile)
            
            print(f"Saved {tile_path}")
            tile_id += 1

# 使用例
input_image_path = "image.png"  # 大きな画像のパス
output_dir = "path/to/output_tiles"  # 分割画像を保存するディレクトリ
tile_width = 256  # 分割後の画像の幅
tile_height = 256  # 分割後の画像の高さ
overlap_ratio = 0.7  # 70%のオーバーラップ

split_image_with_overlap_snake(input_image_path, output_dir, tile_width, tile_height, overlap_ratio)