🐥
オレオレKerasテンプレート
テンプレ
最近、テーブルデータはkerasで回すことが増えてきたのでオレオレテンプレを作っておく。Titanicデータで学習を回しています。
ソースコードは以下にもあります。
import numpy as np
import pandas as pd
import scipy.stats as st
from sklearn.model_selection import StratifiedKFold
# keras関係
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, CSVLogger
# get model
def get_model(num_features):
inputs = tf.keras.layers.Input(shape=(num_features, ), dtype='float64')
x = tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_normal')(inputs)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(64, activation='relu', kernel_initializer='he_normal')(x)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(32, activation='relu', kernel_initializer='he_normal')(x)
x = tf.keras.layers.Dropout(0.1)(x)
outputs = tf.keras.layers.Dense(1)(x)
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='mse',
metrics=['AUC']
)
return model
# titanic data
df = pd.read_csv('/kaggle/input/titanic/train.csv', index_col=0)
# 前処理
df["Sex"] = df["Sex"].replace({"male": 1, "female": 2})
df["Embarked"].fillna("S", inplace=True)
df["Embarked"] = df["Embarked"].replace({"S": 0, "C": 1, "Q": 2})
df["Age"].fillna(df["Age"].mean(), inplace=True)
cols = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked", "Survived"]
df = df[cols]
df.reset_index(drop=True, inplace=True)
df
# 正規化する
# ちなみに正規化しないと成績ひどいことになるので、何らかの正規化の必要性を味わえる
features = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
df[features] = st.zscore(df[features])
df
# 実際のコードでは使用しないが、ここでは例として示してみる
model = get_model(7)
model.summary()
# CV
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=0)
for fold, (trn_idx, val_idx) in enumerate(cv.split(df, df["Survived"]), start=1):
trn_x = df[features].iloc[trn_idx, :]
trn_y = df["Survived"].iloc[trn_idx]
val_x = df[features].iloc[val_idx, :]
val_y = df["Survived"].iloc[val_idx]
print(f">==== fold: {fold} =====<")
model = get_model(trn_x.shape[1])
# define callbacks
# verbose: 0 mean silent
cb_rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=0, min_delta=1e-4, mode='min')
cb_ckp = ModelCheckpoint(f'./model.keras.fold{fold}.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min')
cb_es = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=5, mode='min', restore_best_weights=True, verbose=0)
cb_csv = CSVLogger(f"./log.fold{fold}.csv", append=True)
cb_list = [cb_rlr, cb_ckp, cb_es, cb_csv]
history = model.fit(
trn_x, trn_y,
validation_data=(val_x, val_y),
batch_size=128,
epochs=100,
callbacks=cb_list
)
model.save(f"model.keras.fold{fold}")
# 簡単にbest epochのstatsを可視化
best_epoch = np.argmin(history.history["val_loss"]) + 1
best_loss = history.history["val_loss"][best_epoch-1]
best_auc = history.history["val_auc"][best_epoch-1]
print("---")
print(f"Best epoch: {best_epoch}, val_loss: {best_loss}, val_auc: {best_auc}")
print("---")
modelメモ
- activationはswishでもいいかも
-
kernel_regularizer=l2(0.01)
は入れてもいいかも(Denseのオプション) - batchnormalizationも入れてもいいかも
- カテゴリカルはembeddingする
Discussion