🪴
【ML Method】Stacking Example Introduction
Stacking
Stacking simply means training a new model using the outputs of multiple models as inputs to a (mostly) linear model.
Thus, Stacking needs that 'output of some models', 'label of those'.
The example imprementation is below.
・Stacking
# !pip install torchview
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from torchview import draw_graph
from __future__ import annotations
# Generate synthetic data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Convert to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)
# Define base model 1
class BaseModel1(nn.Module):
def __init__(self):
super(BaseModel1, self).__init__()
self.linear = nn.Sequential(
nn.Linear(20, 50),
nn.ReLU(),
nn.Linear(50, 1)
)
def forward(self, x):
return self.linear(x)
# Define base model 2
class BaseModel2(nn.Module):
def __init__(self):
super(BaseModel2, self).__init__()
self.linear = nn.Sequential(
nn.Linear(20, 100),
nn.ReLU(),
nn.Linear(100, 1)
)
def forward(self, x):
return self.linear(x)
# Train function for models
def train_model(model, X_train, y_train):
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
model.train()
for epoch in range(100):
optimizer.zero_grad()
outputs = model(X_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
# Instantiate models
model1 = BaseModel1()
model2 = BaseModel2()
# Train base models
train_model(model1, X_train, y_train)
train_model(model2, X_train, y_train)
# Make predictions and use as features for the final model
with torch.no_grad():
model1.eval()
model2.eval()
preds1 = model1(X_train)
preds2 = model2(X_train)
stacked_features = torch.cat((preds1, preds2), dim=1)
# Define final model
class FinalModel(nn.Module):
def __init__(self):
super(FinalModel, self).__init__()
self.linear = nn.Linear(2, 1) # in_feature, out_feature
def forward(self, x):
return self.linear(x)
final_model = FinalModel()
train_model(final_model, stacked_features, y_train)
# Evaluate the final model
with torch.no_grad():
final_model.eval()
test_features = torch.cat((model1(X_test), model2(X_test)), dim=1)
final_pred = final_model(test_features)
test_loss = nn.MSELoss()(final_pred, y_test)
print(f'Test Loss: {test_loss.item()}')
model_graph = draw_graph(final_model, input_size=test_features.shape, expand_nested=True)
# Display the graph
display(model_graph.visual_graph)
・Structure
Also, we can train them together.
・Stacking(using dataloader, train together)
code
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
# Generate synthetic data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Convert to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)
# Define base model 1
class BaseModel1(nn.Module):
def __init__(self):
super(BaseModel1, self).__init__()
self.linear = nn.Sequential(
nn.Linear(20, 50),
nn.ReLU(),
nn.Linear(50, 1)
)
def forward(self, x):
return self.linear(x)
# Define base model 2
class BaseModel2(nn.Module):
def __init__(self):
super(BaseModel2, self).__init__()
self.linear = nn.Sequential(
nn.Linear(20, 100),
nn.ReLU(),
nn.Linear(100, 1)
)
def forward(self, x):
return self.linear(x)
# Train function for models
def train_model(model, X_train, y_train):
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
model.train()
for epoch in range(100):
optimizer.zero_grad()
outputs = model(X_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
# Instantiate models
model1 = BaseModel1()
model2 = BaseModel2()
# Train base models
train_model(model1, X_train, y_train)
train_model(model2, X_train, y_train)
# Make predictions and use as features for the final model
with torch.no_grad():
model1.eval()
model2.eval()
preds1 = model1(X_train)
preds2 = model2(X_train)
stacked_features = torch.cat((preds1, preds2), dim=1)
# Define final model
class FinalModel(nn.Module):
def __init__(self):
super(FinalModel, self).__init__()
self.linear = nn.Linear(2, 1) # in_feature, out_feature
def forward(self, x):
return self.linear(x)
final_model = FinalModel()
train_model(final_model, stacked_features, y_train)
# Evaluate the final model
with torch.no_grad():
final_model.eval()
test_features = torch.cat((model1(X_test), model2(X_test)), dim=1)
final_pred = final_model(test_features)
test_loss = nn.MSELoss()(final_pred, y_test)
print(f'Test Loss: {test_loss.item()}')
Staking is useful approach for model inproving. Please try it.
Discussion