🪴

【ML】Stacking Example Introduction

2024/06/09に公開

Stacking

Stacking simply means training a new model using the outputs of multiple models as inputs to a (mostly) linear model.

Thus, Stacking needs that 'output of some models', 'label of those'.
The example imprementation is below.

・Stacking

# !pip install torchview

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from torchview import draw_graph
from __future__ import annotations

# Generate synthetic data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Define base model 1
class BaseModel1(nn.Module):
    def __init__(self):
        super(BaseModel1, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(20, 50),
            nn.ReLU(),
            nn.Linear(50, 1)
        )
    
    def forward(self, x):
        return self.linear(x)

# Define base model 2
class BaseModel2(nn.Module):
    def __init__(self):
        super(BaseModel2, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(20, 100),
            nn.ReLU(),
            nn.Linear(100, 1)
        )
    
    def forward(self, x):
        return self.linear(x)

# Train function for models
def train_model(model, X_train, y_train):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    model.train()
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

# Instantiate models
model1 = BaseModel1()
model2 = BaseModel2()

# Train base models
train_model(model1, X_train, y_train)
train_model(model2, X_train, y_train)

# Make predictions and use as features for the final model
with torch.no_grad():
    model1.eval()
    model2.eval()
    preds1 = model1(X_train)
    preds2 = model2(X_train)
    stacked_features = torch.cat((preds1, preds2), dim=1)

# Define final model
class FinalModel(nn.Module):
    def __init__(self):
        super(FinalModel, self).__init__()
        self.linear = nn.Linear(2, 1) # in_feature, out_feature
    
    def forward(self, x):
        return self.linear(x)

final_model = FinalModel()
train_model(final_model, stacked_features, y_train)

# Evaluate the final model
with torch.no_grad():
    final_model.eval()
    test_features = torch.cat((model1(X_test), model2(X_test)), dim=1)
    final_pred = final_model(test_features)
    test_loss = nn.MSELoss()(final_pred, y_test)
    print(f'Test Loss: {test_loss.item()}')

model_graph = draw_graph(final_model, input_size=test_features.shape, expand_nested=True)

# Display the graph
display(model_graph.visual_graph)

・Structure

Also, we can train them together.
・Stacking(using dataloader, train together)

code
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# Generate synthetic data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Define base model 1
class BaseModel1(nn.Module):
    def __init__(self):
        super(BaseModel1, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(20, 50),
            nn.ReLU(),
            nn.Linear(50, 1)
        )
    
    def forward(self, x):
        return self.linear(x)

# Define base model 2
class BaseModel2(nn.Module):
    def __init__(self):
        super(BaseModel2, self).__init__()
        self.linear = nn.Sequential(
            nn.Linear(20, 100),
            nn.ReLU(),
            nn.Linear(100, 1)
        )
    
    def forward(self, x):
        return self.linear(x)

# Train function for models
def train_model(model, X_train, y_train):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    model.train()
    for epoch in range(100):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

# Instantiate models
model1 = BaseModel1()
model2 = BaseModel2()

# Train base models
train_model(model1, X_train, y_train)
train_model(model2, X_train, y_train)

# Make predictions and use as features for the final model
with torch.no_grad():
    model1.eval()
    model2.eval()
    preds1 = model1(X_train)
    preds2 = model2(X_train)
    stacked_features = torch.cat((preds1, preds2), dim=1)

# Define final model
class FinalModel(nn.Module):
    def __init__(self):
        super(FinalModel, self).__init__()
        self.linear = nn.Linear(2, 1) # in_feature, out_feature
    
    def forward(self, x):
        return self.linear(x)

final_model = FinalModel()
train_model(final_model, stacked_features, y_train)

# Evaluate the final model
with torch.no_grad():
    final_model.eval()
    test_features = torch.cat((model1(X_test), model2(X_test)), dim=1)
    final_pred = final_model(test_features)
    test_loss = nn.MSELoss()(final_pred, y_test)
    print(f'Test Loss: {test_loss.item()}')

Staking is useful approach for model inproving. Please try it.

Discussion