📌

LighGBMでcustom objectを使う

2022/09/15に公開3件

Python

LightGBM

tech

2022/9/15

目的：論文で提案されている様々なlossの改善の恩恵をlighGBMでも受けれるようにする

手順

objective function (loss)を自作する
trainのfobjに自作objective functionを渡す

objective functionを作る

predsとlabels (lgb.Datasetの中に情報がある)を入力にして、gradとhessianを返せばいい。

これはlightbmの最適化がニュートン法 (2次の勾配まで使う)を使っているため

回帰問題のgrad(1次微分)とhessian(2次微分)は

よって自作関数は以下のように作れる

def loglikelihood(preds: np.ndarray, train_data: lgb.Dataset) -> Tuple[np.ndarray]:
    labels = train_data.get_label()
    preds = 1. / (1. + np.exp(-preds))
    grad = preds - labels
    hess = preds * (1. - preds)
    return grad, hess

fobjに自作objective functionを渡す

model = lgb.train(lgb_train,
                  fobj=loglikelihood)

fobjに渡す。

公式に完全なexampleがのっている https://github.com/microsoft/LightGBM/blob/master/examples/python-guide/advanced_example.py#L167

おまけ: multi classをregressionをした場合

import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from pandas import DataFrame
from numpy import argmax
from typing import Optional, Tuple
import numpy as np


iris = load_iris()
x, y = iris.data, iris.target

x_df = DataFrame(x, columns=iris.feature_names)
x_train, x_test, y_train, y_test = train_test_split(x_df, y, test_size=0.15)
num_train = len(x_train)
# laoding data
lgb_train = lgb.Dataset(x_train, y_train)
#lgb_eval = lgb.Dataset(x_test, y_test, reference=lgb_train)


class MultiLoglossForLGBM:
    """Self-made multi-class logloss for LightGBM."""

    def __init__(self, n_class: int = 3, use_softmax: bool = True, epsilon: float = 1e-32) -> None:
        """Initialize."""
        self.name = "my_mlnloss"
        self.n_class = n_class
        self.prob_func = self._get_prob_value if use_softmax else lambda x: x
        self.epsilon = epsilon

    def _calc_grad_and_hess(
            self, preds: np.ndarray, labels: np.ndarray, weight: Optional[np.ndarray] = None
    ) -> Tuple[np.ndarray]:
        """Calc Grad and Hess"""
        # # get prob value by softmax
        prob = self.prob_func(preds)  # <= margin を確率値に直す
        # # convert labels to 1-hot
        labels = OneHotEncoder().fit(labels.reshape(-1, 1)).transform(labels.reshape(-1, 1)).toarray()  # <= labels (1D-array) を 1hot (2D-array) に変換

        grad = prob - labels
        hess = prob * (1 - prob)
        if weight is not None:
            grad = grad * weight[:, None]
            hess = hess * weight[:, None]
        return grad, hess

    def return_grad_and_hess(self, preds: np.ndarray, data: lgb.Dataset) -> Tuple[np.ndarray]:
        """Return Grad and Hess for lightgbm"""
        labels = data.get_label()
        weight = data.get_weight()
        n_example = len(labels)

        # # reshape preds: (n_class * n_example,) => (n_class, n_example) =>  (n_example, n_class)
        preds = preds.reshape(self.n_class, n_example).T  # <= preds (1D-array) を 2D-array に直す
        # # calc grad and hess.
        grad, hess = self._calc_grad_and_hess(preds, labels, weight)

        # # reshape grad, hess: (n_example, n_class) => (n_class, n_example) => (n_class * n_example,)
        grad = grad.T.reshape(n_example * self.n_class)  # <= 1D-array に戻す
        hess = hess.T.reshape(n_example * self.n_class)  # <= 1D-array に戻す

        return grad, hess


params = {
    'boosting': 'gbdt',
    'num_leaves': 10,
    'num_class': 3
}

my_mlnloss = MultiLoglossForLGBM(n_class=3, use_softmax=False)

model = lgb.train(params,
                 train_set=lgb_train,
                 fobj=my_mlnloss.return_grad_and_hess,
                )

# prediction
y_pred = model.predict(x_test)

y_pred = argmax(y_pred, axis=1)
cr = classification_report(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(cr)
print(cm)

コードはここのコードに強く依存しています

Discussion

Fehde

def loglikelihood(preds: np.ndarray, train_data: lgb.Dataset) -> Tuple[np.ndarray]:
    labels = train_data.get_label()
    preds = 1. / (1. + np.exp(-preds))
    grad = preds - labels
    hess = preds * (1. - preds)
    return grad, hess

ちゃんと理解していないかもしれませんが、grad は誤ってませんか？
誤：grad = preds - labels
正：grad = 1. - preds

ymd

コメントありがとうございます。

たぶん今のであっていて、単純な2乗誤差なら
(preds - 1)でいいのですが、今回は multi class regressionなので
(pred - labels)になっています。 labelsはクラスの方向があり、正解のところだけ1になる1hot のベクトルです。
なので1 classの regressionだと labelsが常に１になりよく見る形になります。

Fehde

ご教授いただき、ありがとうございます！
自分の理解が追いついていないので、また読み込んだ後に分からないところ教えて頂ければと思います！