🐬
【TF Tutorial】Chapter 5: Advanced Model Architectures
5. Advanced Model Architectures
5.1 Convolutional Neural Networks (CNNs)
CNN is a very popular model in the machine learning field. This is used for image classification, NLP, signal analysis, etc. as 1d, 2d, 3d convolution with pooling.
This works like extracting features of the input.
・CNN Example
import tensorflow as tf
from tensorflow.keras import layers, models
# Define a simple CNN using the Functional API
inputs = tf.keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = models.Model(inputs=inputs, outputs=outputs)
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Load and preprocess data
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((-1, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((-1, 28, 28, 1)).astype('float32') / 255
# Train the model
model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels))
PyTorch Ver.
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
# Define a simple CNN
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.fc1 = nn.Linear(12*12*64, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = nn.ReLU()(x)
x = self.conv2(x)
x = nn.ReLU()(x)
x = nn.MaxPool2d(2)(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = nn.ReLU()(x)
x = self.fc2(x)
return nn.LogSoftmax(dim=1)(x)
# Load data
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST('.', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
# Initialize model, loss, and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
for epoch in range(1, 3):
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item()}')
5.2 Recurrent Neural Networks (RNNs)
RNNs are designed for sequential data and maintain the feature of input by hidden state.
Especially, in the field like prediction/forecast, it performs highly score.
Both RSTM and GRU are strong, I recommend you try LSTM first if you have enough computational resources.
・LSTM Example
import tensorflow as tf
from tensorflow.keras import layers, models
# Define a simple RNN using the Functional API
inputs = tf.keras.Input(shape=(None, 10))
x = layers.SimpleRNN(20)(inputs)
outputs = layers.Dense(1)(x)
model = models.Model(inputs=inputs, outputs=outputs)
# Compile the model
model.compile(optimizer='adam', loss='mse')
# Generate some dummy data
import numpy as np
data = np.random.random((1000, 10, 10))
labels = np.random.random((1000, 1))
# Train the model
model.fit(data, labels, epochs=5, batch_size=32)
・GRU Example
import tensorflow as tf
from tensorflow.keras import layers, models
# Define a simple GRU using the Functional API
inputs = tf.keras.Input(shape=(None, 10))
x = layers.GRU(20)(inputs)
outputs = layers.Dense(1)(x)
model = models.Model(inputs=inputs, outputs=outputs)
# Compile the model
model.compile(optimizer='adam', loss='mse')
# Generate some dummy data
import numpy as np
data = np.random.random((1000, 10, 10))
labels = np.random.random((1000, 1))
# Train the model
model.fit(data, labels, epochs=5, batch_size=32)
PyTorch Ver.
import torch.nn.functional as F
class SimpleRNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleRNN, self).__init__()
self.hidden_size = hidden_size
self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
out, _ = self.rnn(x, h0)
out = self.fc(out[:, -1, :])
return out
# Initialize and use the RNN model
model = SimpleRNN(input_size=10, hidden_size=20, output_size=1)
5.3 Transfer Learning and Pre-trained Models
Transfer learning and pre-trained models are so useful, using them as initial weights of models, make great score to another similar but not same tasks.
The NLP models like bert are also created by this method.
・Transfer Learning
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
# Load a pre-trained ResNet model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Freeze all layers
base_model.trainable = False
# Add custom classification layers
inputs = tf.keras.Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = models.Model(inputs=inputs, outputs=outputs)
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Generate some dummy data
import numpy as np
data = np.random.random((100, 224, 224, 3))
labels = np.random.randint(10, size=(100,))
# Train the model
model.fit(data, labels, epochs=5, batch_size=32)
Discussion