🕐
【10日目】ベイズ最適化をやってみる【2021アドベントカレンダー】
2021年1人アドベントカレンダー(機械学習)、10日目の記事になります。
テーマは ベイズ最適化 になります。
ベイズ最適化は、すでに持っている情報から、次にどの点を調べた方がいいかを判断し、実際に評価を行い、その結果を次の判断に利用するということを自動的に行う手法。
これにより、すべての点を評価しなくても、効率的に最適値にたどり着くことができる。
本記事では理論的な解説は行わず、Optuna による最適化と、精度と処理時間を比較してみます。
Colab のコードはこちら
ベイズ最適化
from bayes_opt import BayesianOptimization
def lgb_eval(num_leaves, feature_fraction, bagging_fraction, max_depth, lambda_l1, lambda_l2, min_split_gain, min_child_weight):
params = {
'application':'regression',
'num_iterations':4000,
'learning_rate':0.05,
'early_stopping_round':100,
'metric':'rmse'
}
params["num_leaves"] = round(num_leaves)
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
params['max_depth'] = round(max_depth)
params['lambda_l1'] = max(lambda_l1, 0)
params['lambda_l2'] = max(lambda_l2, 0)
params['min_split_gain'] = min_split_gain
params['min_child_weight'] = min_child_weight
cv_result = lgb.cv(
params,
train_data,
nfold=n_folds,
seed=random_seed,
stratified=False,
verbose_eval =200,
metrics=['rmse']
)
return max(cv_result['rmse-mean'])
lgbBO = BayesianOptimization(
lgb_eval,
{
'num_leaves': (24, 45),
'feature_fraction': (0.1, 0.9),
'bagging_fraction': (0.8, 1),
'max_depth': (5, 8.99),
'lambda_l1': (0, 5),
'lambda_l2': (0, 3),
'min_split_gain': (0.001, 0.1),
'min_child_weight': (5, 50)
},
random_state=SEED
)
def bayes_parameter_opt_lgb(X, y, init_round=15, opt_round=25, n_folds=5, random_seed=6, n_estimators=10000, learning_rate=0.05, output_process=False):
# prepare data
train_data = lgb.Dataset(
data=X_train_ce, ### 適宜要修正
label=y_train, ### 適宜要修正
free_raw_data=False
)
# parameters
def lgb_eval(num_leaves, feature_fraction, bagging_fraction, max_depth, lambda_l1, lambda_l2, min_split_gain, min_child_weight):
params = {
'application':'regression',
'num_iterations': n_estimators,
'learning_rate':learning_rate,
'early_stopping_round':100,
'metric':'rmse'
}
params["num_leaves"] = int(round(num_leaves))
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
params['max_depth'] = int(round(max_depth))
params['lambda_l1'] = max(lambda_l1, 0)
params['lambda_l2'] = max(lambda_l2, 0)
params['min_split_gain'] = min_split_gain
params['min_child_weight'] = min_child_weight
cv_result = lgb.cv(
params, train_data,
nfold=n_folds,
seed=random_seed,
stratified=False,
verbose_eval =200,
metrics=['rmse']
)
return max(cv_result['rmse-mean'])
lgbBO = BayesianOptimization(lgb_eval,
{'num_leaves': (24, 45),
'feature_fraction': (0.1, 0.9),
'bagging_fraction': (0.8, 1),
'max_depth': (5, 8.99),
'lambda_l1': (0, 5),
'lambda_l2': (0, 3),
'min_split_gain': (0.001, 0.1),
'min_child_weight': (5, 50)}, random_state=0)
# optimize
lgbBO.maximize(init_points=init_round, n_iter=opt_round)
# output optimization process
if output_process==True: lgbBO.points_to_csv("bayes_opt_result.csv")
# return best parameters
return lgbBO.res[0]["params"]
opt_params = bayes_parameter_opt_lgb(
X_train_ce,
y_train,
init_round=5,
opt_round=10,
n_folds=3,
random_seed=SEED,
n_estimators=100,
learning_rate=0.05
)
y_train = y_train.reset_index(drop=True)
gkf = GroupKFold(n_splits=5)
groups = X_train_ce["Genre"]
lgb_results = {} # 学習の履歴を入れる入物
opt_params["application"] = "regression" # 回帰を追加
opt_params["metrics"] = "rmse" # 評価指標を追加
opt_params["num_leaves"] = int(round(opt_params["num_leaves"])) # 整数に変換
opt_params["max_depth"] = int(round(opt_params["max_depth"])) # 整数に変換
cv_result_bayes = []
for train_index, test_index in gkf.split(X_train_ce, y_train, groups):
X_train_gkf, X_test_gkf = X_train_ce.iloc[train_index], X_train_ce.iloc[test_index]
y_train_gkf, y_test_gkf = y_train.iloc[train_index], y_train.iloc[test_index]
# データセットを登録
lgb_train = lgb.Dataset(X_train_gkf, y_train_gkf)
lgb_test = lgb.Dataset(X_test_gkf, y_test_gkf, reference=lgb_train)
lgb_results = {} # 学習の履歴を入れる入物
model = lgb.train(
params=opt_params, # ベストパラメータをセット
train_set=lgb_train, # 訓練データを訓練用にセット
valid_sets=[lgb_train, lgb_test], # 訓練データとテストデータをセット
valid_names=['Train', 'Test'], # データセットの名前をそれぞれ設定
num_boost_round=100, # 計算回数
early_stopping_rounds=50, # アーリーストッピング設定
evals_result=lgb_results,
verbose_eval=-1, # ログを最後の1つだけ表示
)
# 損失推移を表示
loss_train = lgb_results['Train']['rmse']
loss_test = lgb_results['Test']['rmse']
fig = plt.figure()
plt.xlabel('Iteration')
plt.ylabel('logloss')
plt.title(f"fold:{fold}")
plt.plot(loss_train, label='train loss')
plt.plot(loss_test, label='test loss')
plt.legend()
plt.show()
# 推論
y_pred = model.predict(X_test_gkf)
# 評価
rmse = mean_squared_error(y_test_gkf, y_pred, squared=False)
cv_result_bayes.append(rmse)
print("RMSE:", cv_result_bayes)
print("RMSE:", np.mean(cv_result_bayes))
Optuna による最適化との比較
Optuna による最適化についてはここでは割愛
print("Optuna RMSE:", round(np.mean(cv_result_opt), 3), "経過時間:", elasp_time_opt, "秒")
print("ベイズ最適化 RMSE:", round(np.mean(cv_result_bayes), 3), "経過時間:", elasp_time_bayes, "秒")
精度は Optuna に負けますが、処理時間は圧倒的にベイズ最適化のほうが短いですね。
内訳 | RMSE | 経過時間 |
---|---|---|
Optuna最適化 | 0.186 | 187.2秒 |
ベイズ最適化 | 0.209 | 13.9 秒 |
10日目は以上になります、最後までお読みいただきありがとうございました。
参考情報
Discussion