🐙

動画生成AIモデル

2024/05/19に公開

chat

tech

トレーニング開始時にモデルの構造（概要）が表示され、エポック0およびその後の各200エポックごとにオリジナル画像と再構成画像がプロットされるようになります。

Model: "autoencoder"

Layer (type) Output Shape Param #

input_19 (InputLayer) [(None, 32, 32, 3)] 0

encoder (Functional) (None, 16000) 49168000

decoder (Functional) (None, 32, 32, 3) 49155072

=================================================================
Total params: 98323072 (375.07 MB)
Trainable params: 98323072 (375.07 MB)
Non-trainable params: 0 (0.00 Byte)

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt

# CIFAR-10データセットの読み込み
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

# データの前処理（正規化）
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

weight_size = 32000
num_sumples = 200

# 同じラベルの画像ペアを作成する関数
def create_pairs(x, y, num_pairs=num_sumples):
    pairs = []
    labels = []
    num_classes = len(np.unique(y))
    digit_indices = [np.where(y == i)[0] for i in range(num_classes)]
    for d in range(num_classes):
        np.random.shuffle(digit_indices[d])  # Shuffle indices to get random pairs
        for i in range(min(len(digit_indices[d]) - 1, num_pairs // num_classes)):
            z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
            pairs += [[x[z1], x[z2]]]
            labels += [1]
        if len(pairs) >= num_pairs:
            break
    return np.array(pairs), np.array(labels)

# 訓練用ペアを作成（100ペア）
train_pairs, train_labels = create_pairs(x_train, y_train, num_pairs=num_sumples)

# モデルの定義
def create_model(input_shape):
    input = layers.Input(shape=input_shape)
    x = layers.Flatten()(input)
    encoded = layers.Dense(weight_size, activation='relu')(x)
    
    encoder = models.Model(input, encoded, name='encoder')

    encoded_input = layers.Input(shape=(weight_size,))
    x = layers.Dense(np.prod(input_shape), activation='sigmoid')(encoded_input)
    decoded = layers.Reshape(input_shape)(x)

    decoder = models.Model(encoded_input, decoded, name='decoder')
    
    autoencoder = models.Model(input, decoder(encoder(input)), name='autoencoder')
    return autoencoder

# モデルのコンパイル
autoencoder = create_model((32, 32, 3))
autoencoder.compile(optimizer='adam', loss='mse')

# モデルの概要を表示
autoencoder.summary()

# カスタムコールバックの定義
class PlotReconstructedImages(callbacks.Callback):
    def __init__(self):
        super().__init__()
        self.epoch = 0

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % 200 == 0 or epoch == 0:
            self.plot_images(epoch)

    def plot_images(self, epoch):
        decoded_imgs = self.model.predict(train_pairs[:, 0])
        n = 10  # プロットする画像の数
        plt.figure(figsize=(20, 4))
        for i in range(n):
            # オリジナル画像
            ax = plt.subplot(2, n, i + 1)
            plt.imshow(train_pairs[i, 0])
            plt.title("Original")
            plt.axis("off")

            # 再構成画像
            ax = plt.subplot(2, n, i + 1 + n)
            plt.imshow(decoded_imgs[i])
            plt.title("Reconstructed")
            plt.axis("off")
        plt.suptitle(f'Epoch {epoch+1}')
        plt.show()

# モデルの訓練
plot_callback = PlotReconstructedImages()
autoencoder.fit(train_pairs[:, 0], train_pairs[:, 1],
                epochs=1000,
                batch_size=10,  # バッチサイズを小さくしてオーバーフィッティングを防ぐ
                shuffle=True,
                verbose=0,  # 損失をプリントしない
                callbacks=[plot_callback])

Layer (type) Output Shape Param #

Discussion