Zenn
Open18

YOLOv9 (not MIT)

PINTOPINTO
optional
sudo apt-get update \
&& sudo apt-get install -y nano libgl1-mesa-dev libopencv-dev

pip install -r requirements.txt
data/original.yaml
data/original.yaml
train: /home/xxxx/git/yolov9/dataset/images/train
val: /home/xxxx/git/yolov9/dataset/images/val

# Classes
names:
  0: Body
  1: BodyWithWheelchair
  2: Head
  3: Face
  4: Eye
  5: Nose
  6: Mouth
  7: Ear
  8: Hand
  9: Hand-Left
  10: Hand-Right
  11: Foot
data/hyps/hyp.scratch-high_original.yaml
data/hyps/hyp.scratch-high_original.yaml
lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4
warmup_epochs: 3.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 7.5  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 0.7  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
dfl: 1.5  # dfl loss gain
iou_t: 0.20  # IoU training threshold
anchor_t: 5.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.1  # image translation (+/- fraction)
scale: 0.9  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.0  # image flip up-down (probability)
fliplr: 0.0  # image flip left-right (probability)
mosaic: 1.0  # image mosaic (probability)
mixup: 0.15  # image mixup (probability)
copy_paste: 0.3  # segment copy-paste (probability)
models/detect/yolov9-t_original.yaml
models/detect/yolov9-t_original.yaml
# YOLOv9

# parameters
nc: 8  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()

# anchors
anchors: 3

# gelan backbone
backbone:
  [
   # conv down
   [-1, 1, Conv, [16, 3, 2]],  # 0-P1/2

   # conv down
   [-1, 1, Conv, [32, 3, 2]],  # 1-P2/4

   # elan-1 block
   [-1, 1, ELAN1, [32, 32, 16]],  # 2

   # avg-conv down
   [-1, 1, AConv, [64]],  # 3-P3/8

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 4

   # avg-conv down
   [-1, 1, AConv, [96]],  # 5-P4/16

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 6

   # avg-conv down
   [-1, 1, AConv, [128]],  # 7-P5/32

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]],  # 8
  ]

# elan head
head:
  [
   # elan-spp block
   [-1, 1, SPPELAN, [128, 64]],  # 9

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 12

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 15

   # avg-conv-down merge
   [-1, 1, AConv, [48]],
   [[-1, 12], 1, Concat, [1]],  # cat head P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 18 (P4/16-medium)

   # avg-conv-down merge
   [-1, 1, AConv, [64]],
   [[-1, 9], 1, Concat, [1]],  # cat head P5

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]],  # 21 (P5/32-large)

   # elan-spp block
   [8, 1, SPPELAN, [128, 64]],  # 22

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 25

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 28

   # detect
   [[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]],  # Detect(P3, P4, P5)
  ]
models/detect/yolov9-t_original-relu.yaml
models/detect/yolov9-t_original-relu.yaml
# YOLOv9

# parameters
nc: 8  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple
#activation: nn.LeakyReLU(0.1)
activation: nn.ReLU()

# anchors
anchors: 3

# gelan backbone
backbone:
  [
   # conv down
   [-1, 1, Conv, [16, 3, 2]],  # 0-P1/2

   # conv down
   [-1, 1, Conv, [32, 3, 2]],  # 1-P2/4

   # elan-1 block
   [-1, 1, ELAN1, [32, 32, 16]],  # 2

   # avg-conv down
   [-1, 1, AConv, [64]],  # 3-P3/8

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 4

   # avg-conv down
   [-1, 1, AConv, [96]],  # 5-P4/16

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 6

   # avg-conv down
   [-1, 1, AConv, [128]],  # 7-P5/32

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]],  # 8
  ]

# elan head
head:
  [
   # elan-spp block
   [-1, 1, SPPELAN, [128, 64]],  # 9

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 12

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 15

   # avg-conv-down merge
   [-1, 1, AConv, [48]],
   [[-1, 12], 1, Concat, [1]],  # cat head P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 18 (P4/16-medium)

   # avg-conv-down merge
   [-1, 1, AConv, [64]],
   [[-1, 9], 1, Concat, [1]],  # cat head P5

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]],  # 21 (P5/32-large)

   # elan-spp block
   [8, 1, SPPELAN, [128, 64]],  # 22

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 25

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 28

   # detect
   [[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]],  # Detect(P3, P4, P5)
  ]
  • Before
    utils/plots.py
    utils/plots.py
        def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
            # Add one xyxy box to image with label
            if self.pil or not is_ascii(label):
                self.draw.rectangle(box, width=self.lw, outline=color)  # box
                if label:
                    w, h = self.font.getsize(label)  # text width, height
                    outside = box[1] - h >= 0  # label fits outside box
                    self.draw.rectangle(
                        (box[0], box[1] - h if outside else box[1], box[0] + w + 1,
                         box[1] + 1 if outside else box[1] + h + 1),
                        fill=color,
                    )
                    # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
                    self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
    
  • After
    utils/plots.py
    utils/plots.py
        def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
            # Add one xyxy box to image with label
            if self.pil or not is_ascii(label):
                self.draw.rectangle(box, width=self.lw, outline=color)  # box
                if label:
                    _, _, w, h = self.font.getbbox(label)  # text width, height
                    outside = box[1] - h >= 0  # label fits outside box
                    self.draw.rectangle(
                        (box[0], box[1] - h if outside else box[1], box[0] + w + 1,
                         box[1] + 1 if outside else box[1] + h + 1),
                        fill=color,
                    )
                    # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls')  # for PIL>8.0
                    self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
    
PINTOPINTO
# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original.yaml \
--weights yolov9-t-converted.pt \
--name yolov9-t \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45

# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original-relu.yaml \
--weights best-t.pt \
--name yolov9-t-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100

# Swish(SiLU) + dev255ノーマライズなし + RGBで学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original.yaml \
--weights yolov9-t-converted.pt \
--name yolov9-t-nonorm \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45
#####################################################

# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-s-original.yaml \
--weights yolov9-s-converted.pt \
--name yolov9-s \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45


# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-m_original.yaml \
--weights yolov9-m-converted.pt \
--name yolov9-m \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45

# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-m_original-relu.yaml \
--weights best-m.pt \
--name yolov9-m-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100

# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-c-original.yaml \
--weights yolov9-c-converted.pt \
--name yolov9-c \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45

# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-c_original-relu.yaml \
--weights best-c.pt \
--name yolov9-c-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100


# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-e_original.yaml \
--weights yolov9-e-converted.pt \
--name yolov9-e \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45

# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-e_original-relu.yaml \
--weights best-e.pt \
--name yolov9-e-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100


#==============================================

python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original.yaml \
--weights runs/train/yolov9-t/weights/last.pt \
--name yolov9-t \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume

# val.py と val_dual.py の違い
# val.py はdual-branchを解消したあとのsingle-branchの状態のモデル用のval
# val_dual.py はdual-branchのままの状態のモデル用のval
# つまり、Re-parameterization をする前かした後かの違い

python val_dual.py \
--data data/original.yaml \
--img 640 \
--batch 32 \
--conf 0.001 \
--iou 0.7 \
--device 0 \
--weights runs/train/yolov9-t/weights/best-t.pt \
--name yolov9_t_640_val

python val_dual.py \
--data data/original.yaml \
--img 640 \
--batch 32 \
--conf 0.001 \
--iou 0.7 \
--device 0 \
--weights runs/train/yolov9-t/weights/best-t-relu.pt \
--name yolov9_t_640_val

python val_dual.py \
--data data/original.yaml \
--img 640 \
--batch 4 \
--conf 0.001 \
--iou 0.7 \
--device 0 \
--weights best-e.pt \
--name yolov9_e_640_val
PINTOPINTO
  • YOLOv9-T - 左右手のFlipあり - 改造していないので壊れる前提
             Class Images Instances     P     R mAP50 mAP50-95
               all   2280     69034 0.697 0.536 0.577    0.367
              Body   2280     13031 0.737 0.721 0.763    0.554
BodyWithWheelchair   2280       133 0.759 0.955 0.968    0.858
              Head   2280     11117 0.845 0.757 0.809    0.558
              Face   2280      5563 0.836 0.687 0.739    0.479
               Eye   2280      4748 0.652 0.299 0.343    0.134
              Nose   2280      4607 0.709  0.42 0.462    0.242
             Mouth   2280      3757 0.653 0.373 0.406    0.181
               Ear   2280      4328 0.705 0.417 0.463    0.245
              Hand   2280      7567 0.848 0.537 0.667    0.394
         Hand-Left   2280      3798 0.477 0.369 0.363    0.215
        Hand-Right   2280      3769  0.48 0.358 0.354    0.213
              Foot   2280      6616 0.668 0.544 0.592    0.327
PINTOPINTO
  • YOLOv9-T - 左右手のFlipなし - 左右反転のオーグメンテーションを無効化した状態
             Class Images Instances     P     R mAP50 mAP50-95
               all   2280     69034 0.743 0.533 0.596    0.376
              Body   2280     13031 0.735 0.724 0.764    0.554
BodyWithWheelchair   2280       133 0.773  0.94 0.947    0.820
              Head   2280     11117 0.833 0.755 0.803    0.551
              Face   2280      5563 0.837 0.682 0.739    0.496
               Eye   2280      4748 0.679 0.278 0.328    0.126
              Nose   2280      4607 0.721 0.387 0.433    0.222
             Mouth   2280      3757  0.69 0.356 0.393    0.174
               Ear   2280      4328  0.71 0.396 0.447    0.235
              Hand   2280      7567 0.856 0.512 0.655    0.383
         Hand-Left   2280      3798 0.712 0.419 0.535    0.320
        Hand-Right   2280      3769 0.701 0.418 0.521    0.314
              Foot   2280      6616  0.67 0.534 0.585    0.322

YOLOX-Tinyとの比較

             Class Images Instances       mAP50-95
                                    YOLOX-Tiny YOLOv9-T
               all   2280     69034      0.339    0.376
              Body   2280     13031      0.453    0.554
BodyWithWheelchair   2280       133      0.688    0.820
              head   2280     11117      0.482    0.551
              face   2280      5563      0.434    0.496
               eye   2280      4748      0.120    0.126
              nose   2280      4607      0.215    0.222
             mouth   2280      3757      0.185    0.174
               ear   2280      4328      0.213    0.235
              hand   2280      7567      0.373    0.383
         hand_left   2280      3798      0.307    0.320
        hand_right   2280      3769      0.304    0.314
              foot   2280      6616      0.292    0.322

YOLOv9-E - 左右手のFlipなし - 左右反転のオーグメンテーションを無効化した状態(84epoch途中経過)

             Class Images Instances      mAP50-95
                                    YOLOX-X YOLOv9-E
               all   2117     48988   0.554    0.624
              Body   2117     11624   0.614    0.712
BodyWithWheelchair   2117       153   0.871    0.851
              Head   2117      9936   0.585    0.701
              Face   2117      5653   0.506    0.621
              Hand   2117      7525   0.513    0.570
         Hand-Left   2117      3739   0.456    0.527
        Hand-Right   2117      3786   0.449    0.527
              Foot   2117      6572   0.431    0.485
PINTOPINTO
  • YOLOv9-T - Re-parameterization

    reparameterization.py
    reparameterization.py
    import torch
    from models.yolo import Model
    import argparse
    
    def main(args):
        type: str = args.type
        cfg: str = args.cfg
        check_point_file: str = args.weights
        save_pt_file_name = args.save
    
        ckpt = torch.load(check_point_file, map_location='cpu')
        names = ckpt['model'].names
        nc = ckpt['model'].nc
    
        device = torch.device("cpu")
        model = Model(cfg, ch=3, nc=nc, anchors=3)
        print('')
        print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ nc: {nc}')
        print('')
        model = model.to(device)
        _ = model.eval()
        model.names = names
        model.nc = nc
    
        idx = 0
        if type in ['n', 't', 's']:
            for k, v in model.state_dict().items():
                if "model.{}.".format(idx) in k:
                    if idx < 22:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                else:
                    while True:
                        idx += 1
                        if "model.{}.".format(idx) in k:
                            break
                    if idx < 22:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
    
        elif type == 'm':
            for k, v in model.state_dict().items():
                if "model.{}.".format(idx) in k:
                    if idx < 22:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                else:
                    while True:
                        idx += 1
                        if "model.{}.".format(idx) in k:
                            break
                    if idx < 22:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
    
        elif type == 'c':
            for k, v in model.state_dict().items():
                if "model.{}.".format(idx) in k:
                    if idx < 22:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                else:
                    while True:
                        idx += 1
                        if "model.{}.".format(idx) in k:
                            break
                    if idx < 22:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
    
        elif type == 'e':
            for k, v in model.state_dict().items():
                if "model.{}.".format(idx) in k:
                    if idx < 29:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif idx < 42:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                else:
                    while True:
                        idx += 1
                        if "model.{}.".format(idx) in k:
                            break
                    if idx < 29:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif idx < 42:
                        kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv2.".format(idx) in k:
                        kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.cv3.".format(idx) in k:
                        kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
                    elif "model.{}.dfl.".format(idx) in k:
                        kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7))
                        model.state_dict()[k] -= model.state_dict()[k]
                        model.state_dict()[k] += ckpt['model'].state_dict()[kr]
                        print(k, "perfectly matched!!")
        _ = model.eval()
    
        m_ckpt = {'model': model.half(),
                'optimizer': None,
                'best_fitness': None,
                'ema': None,
                'updates': None,
                'opt': None,
                'git': None,
                'date': None,
                'epoch': -1}
        torch.save(m_ckpt, save_pt_file_name)
    
    if __name__ == "__main__":
        parser = argparse.ArgumentParser()
        parser.add_argument('--type', type=str, default='t', help='convert model type (t or e)')
        parser.add_argument('--cfg', type=str, default='./models/detect/gelan-t.yaml', help='model.yaml path')
        parser.add_argument('--weights', type=str, default='./best-t.pt', help='weights path')
        parser.add_argument('--save', default=f'./yolov9_wholebody_with_wheelchair_t.pt', type=str, help='save path')
        args = parser.parse_args()
        main(args)
    
  • Re-parameterization と ONNX エクスポート

    python reparameterization.py \
    --type t \
    --cfg ./models/detect/gelan-t.yaml \
    --weights ./best-t.pt \
    --save ./yolov9_wholebody_with_wheelchair_t.pt
    
    python export.py \
    --data data/original.yaml \
    --weights yolov9_wholebody_with_wheelchair_t.pt \
    --imgsz 384 672 \
    --batch-size 1 \
    --device cpu \
    --opset 11 \
    --include onnx
    
    onnxsim yolov9_wholebody_with_wheelchair_t.onnx yolov9_wholebody_with_wheelchair_t.onnx
    onnxsim yolov9_wholebody_with_wheelchair_t.onnx yolov9_wholebody_with_wheelchair_t.onnx
    onnxsim yolov9_wholebody_with_wheelchair_t.onnx yolov9_wholebody_with_wheelchair_t.onnx
    
  • YOLOv9-T-ReLU - Re-parameterization

  • Re-parameterization と ONNX エクスポート

    python reparameterization_relu.py \
    --type t \
    --cfg ./models/detect/gelan-t-relu.yaml \
    --weights ./best-t-relu.pt \
    --save ./yolov9_wholebody_with_wheelchair_t_relu.pt
    
    python export.py \
    --data data/original.yaml \
    --weights yolov9_wholebody_with_wheelchair_t_relu.pt \
    --imgsz 384 672 \
    --batch-size 1 \
    --device cpu \
    --opset 11 \
    --include onnx
    
    onnxsim yolov9_wholebody_with_wheelchair_t_relu.onnx yolov9_wholebody_with_wheelchair_t_relu.onnx
    onnxsim yolov9_wholebody_with_wheelchair_t_relu.onnx yolov9_wholebody_with_wheelchair_t_relu.onnx
    onnxsim yolov9_wholebody_with_wheelchair_t_relu.onnx yolov9_wholebody_with_wheelchair_t_relu.onnx
    
PINTOPINTO
  • YOLOv9-T - 左右手のFlipなし+SiLU+QAT

https://github.com/levipereira/yolov9-qat

export_qat.py の修正
export_qat.py
    if is_model_qat:
        warnings.filterwarnings("ignore")
        LOGGER.info(f'{prefix} Model QAT Detected ...')
        quant_nn.TensorQuantizer.use_fb_fake_quant = True
        model.eval()
        quantize.initialize()
        quantize.replace_custom_module_forward(model)
        with torch.no_grad():
            torch.onnx.export(
                model,
                im,
                f,
                opset_version=13,
                input_names=['images'],
                output_names=output_names)
quantize.py の修正
quantize.py
def remove_redundant_qdq_model(onnx_model, f):
    check_requirements('onnx')
    import onnx

    domain: str = onnx_model.domain
    ir_version: int = onnx_model.ir_version
    meta_data = {'domain': domain, 'ir_version': ir_version}
    metadata_props = None
    if hasattr(onnx_model, 'metadata_props'):
        metadata_props = onnx_model.metadata_props
    graph = gs.import_onnx(onnx_model)
    nodes = graph.nodes

    mul_nodes = [node for node in nodes if node.op == "Mul" and node.i(0).op == "Conv" and node.i(1).op == "Sigmoid"]
    many_outputs_mul_nodes = []

    for node in mul_nodes:
        try:
            for i in range(99):
                node.o(i)
        except:
            if i > 1:
                mul_nodename_outnum = {"node": node, "out_num": i}
                many_outputs_mul_nodes.append(mul_nodename_outnum)

    for node_dict in many_outputs_mul_nodes:
        if node_dict["out_num"] == 2:
            if node_dict["node"].o(0).op == "QuantizeLinear" and node_dict["node"].o(1).op == "QuantizeLinear":
                if node_dict["node"].o(1).o(0).o(0).op == "Concat":
                    concat_dq_out_name = node_dict["node"].o(1).o(0).outputs[0].name
                    for i, concat_input in enumerate(node_dict["node"].o(1).o(0).o(0).inputs):
                        if concat_input.name == concat_dq_out_name:
                            node_dict["node"].o(1).o(0).o(0).inputs[i] = node_dict["node"].o(0).o(0).outputs[0]
                else:
                    node_dict["node"].o(1).o(0).o(0).inputs[0] = node_dict["node"].o(0).o(0).outputs[0]


            # elif node_dict["node"].o(0).op == "QuantizeLinear" and node_dict["node"].o(1).op == "Concat":
            #     concat_dq_out_name = node_dict["node"].outputs[0].outputs[0].inputs[0].name
            #     for i, concat_input in enumerate(node_dict["node"].outputs[0].outputs[1].inputs):
            #         if concat_input.name == concat_dq_out_name:
            #             #print("elif", concat_input.name, concat_dq_out_name )
            #             #print("will-be", node_dict["node"].outputs[0].outputs[1].inputs[i], node_dict["node"].outputs[0].outputs[0].o().outputs[0]  )
            #             node_dict["node"].outputs[0].outputs[1].inputs[i] = node_dict["node"].outputs[0].outputs[0].o().outputs[0]


    # add_nodes = [node for node in nodes if node.op == "Add"]
    # many_outputs_add_nodes = []
    # for node in add_nodes:
    #     try:
    #         for i in range(99):
    #             node.o(i)
    #     except:
    #         if i > 1 and node.o().op == "QuantizeLinear":
    #             add_nodename_outnum = {"node": node, "out_num": i}
    #             many_outputs_add_nodes.append(add_nodename_outnum)


    # for node_dict in many_outputs_add_nodes:
    #     if node_dict["node"].outputs[0].outputs[0].op == "QuantizeLinear" and node_dict["node"].outputs[0].outputs[1].op == "Concat":
    #         concat_dq_out_name = node_dict["node"].outputs[0].outputs[0].inputs[0].name
    #         for i, concat_input in enumerate(node_dict["node"].outputs[0].outputs[1].inputs):
    #             if concat_input.name == concat_dq_out_name:
    #                 node_dict["node"].outputs[0].outputs[1].inputs[i] = node_dict["node"].outputs[0].outputs[0].o().outputs[0]

    exported_graph = gs.export_onnx(graph, **meta_data)
    if metadata_props is not None:
        exported_graph.metadata_props.extend(metadata_props)
    onnx.save(exported_graph, f)
docker pull nvcr.io/nvidia/pytorch:23.02-py3
cd ~
mkdir -p work && cd work
git clone https://github.com/WongKinYiu/yolov9.git && cd yolov9
git checkout 5b1ea9a8b3f0ffe4fe0e203ec6232d788bb3fcff

docker run -it --gpus all  \
--net host  \
--ipc host \
-v `pwd`:/yolov9 \
nvcr.io/nvidia/pytorch:23.02-py3

cd /
# この独自のForkは onnx_graphsurgeon のバグを修正済み
# ir_version や metadata が破壊されるバグの修正
git clone https://github.com/PINTO0309/yolov9-qat.git && cd yolov9-qat
./patch_yolov9.sh /yolov9
./install_dependencies.sh --defaults  --trex

cd /yolov9
/usr/local/lib/python3.8/dist-packages/pytorch_quantization/calib/histogram.py
257 def _compute_amax_mse(calib_hist, calib_bin_edges, num_bits, unsigned, stride=1, start_bin=128):
258     """Returns amax that minimizes MSE of the collected histogram"""
259
260     # If calibrator hasn't collected any data, return none
261     if calib_bin_edges is None and calib_hist is None:
262         return None
263
264     counts = torch.from_numpy(calib_hist[:]).float()
265     edges = torch.from_numpy(calib_bin_edges[:]).float()
266     centers = (edges[1:] + edges[:-1]) / 2
267
268     if len(centers) > len(counts):
269         centers = (edges[2:] + edges[:-2]) / 2

活性化関数 SiLU バージョンでの一時的な動作確認(精度劣化大・パフォーマンス低)

data/original.yaml
data/original.yaml
train: /yolov9/dataset/images/train
val: /yolov9/dataset/images/val

# Classes
names:
  0: Body
  1: BodyWithWheelchair
  2: Head
  3: Face
  4: Hand
  5: Hand-Left
  6: Hand-Right
  7: Foot
data/hyps/hyp.scratch-high_original.yaml
data/hyps/hyp.scratch-high_original.yaml
lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4
warmup_epochs: 3.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 7.5  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 0.7  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
dfl: 1.5  # dfl loss gain
iou_t: 0.20  # IoU training threshold
anchor_t: 5.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4  # image HSV-Value augmentation (fraction)
degrees: 0.0  # image rotation (+/- deg)
translate: 0.1  # image translation (+/- fraction)
scale: 0.9  # image scale (+/- gain)
shear: 0.0  # image shear (+/- deg)
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
flipud: 0.0  # image flip up-down (probability)
fliplr: 0.0  # image flip left-right (probability)
mosaic: 1.0  # image mosaic (probability)
mixup: 0.15  # image mixup (probability)
copy_paste: 0.3  # segment copy-paste (probability)
models/detect/yolov9-t_original.yaml
models/detect/yolov9-t_original.yaml
# YOLOv9

# parameters
nc: 8  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()

# anchors
anchors: 3

# gelan backbone
backbone:
  [
   # conv down
   [-1, 1, Conv, [16, 3, 2]],  # 0-P1/2

   # conv down
   [-1, 1, Conv, [32, 3, 2]],  # 1-P2/4

   # elan-1 block
   [-1, 1, ELAN1, [32, 32, 16]],  # 2

   # avg-conv down
   [-1, 1, AConv, [64]],  # 3-P3/8

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 4

   # avg-conv down
   [-1, 1, AConv, [96]],  # 5-P4/16

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 6

   # avg-conv down
   [-1, 1, AConv, [128]],  # 7-P5/32

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]],  # 8
  ]

# elan head
head:
  [
   # elan-spp block
   [-1, 1, SPPELAN, [128, 64]],  # 9

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 12

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 15

   # avg-conv-down merge
   [-1, 1, AConv, [48]],
   [[-1, 12], 1, Concat, [1]],  # cat head P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 18 (P4/16-medium)

   # avg-conv-down merge
   [-1, 1, AConv, [64]],
   [[-1, 9], 1, Concat, [1]],  # cat head P5

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]],  # 21 (P5/32-large)

   # elan-spp block
   [8, 1, SPPELAN, [128, 64]],  # 22

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]],  # 25

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 28

   # detect
   [[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]],  # Detect(P3, P4, P5)
  ]
python qat.py quantize \
--device cuda:0 \
--weights yolov9_wholebody_with_wheelchair_t.pt \
--name yolov9_qat \
--exist-ok \
--batch-size 4 \
--data data/original.yaml \
--img 640 \
--hyp data/hyps/hyp.scratch-high_original.yaml
  • SiLU のまま QAT したので当然の低精度になる
             Class Images Instances     P     R mAP50 mAP50-95
               all   2117     48988 0.696 0.565 0.619    0.395
              Body   2117     11624   0.7 0.715 0.753    0.529
BodyWithWheelchair   2117       153 0.553 0.865 0.823    0.652
              Head   2117      9936 0.796 0.775 0.812    0.539
              Face   2117      5653 0.794 0.583 0.632    0.391
              Hand   2117      7525 0.805 0.453 0.578    0.313
         Hand-Left   2117      3739 0.603 0.348 0.422    0.235
        Hand-Right   2117      3786 0.638 0.347 0.426    0.235
              Foot   2117      6572 0.683 0.439 0.504    0.263

QAT: Epoch-10, weights saved as runs/qat/yolov9_qat/weights/qat_ep_10_ap_0.3946_yolov9_wholebody_with_wheelchair_t.pt (11.4 MB)

Eval Model | AP       | AP50     | Precision  | Recall  
-------------------------------------------------------
Origin     | 0.399    | 0.622    | 0.696      | 0.567   
PTQ        | 0.395    | 0.618    | 0.693      | 0.565   
QAT - Best | 0.395    | 0.618    | 0.699      | 0.562   

QAT: Eval - Epoch 10 | AP: 0.3946  | AP50: 0.6187 | Precision: 0.6964 | Recall: 0.5655

活性化関数 ReLU バージョンでの動作確認(精度劣化小・パフォーマンス高)

python qat.py quantize \
--device cuda:0 \
--weights yolov9_wholebody_with_wheelchair_t_relu.pt \
--name yolov9_qat \
--exist-ok \
--batch-size 4 \
--data data/original.yaml \
--img 640 \
--hyp data/hyps/hyp.scratch-high_original.yaml
  • ReLU に変更して QAT したので 0.2 ポイントの低下に抑制できている
             Class Images Instances mAP50-95
               all   2117     48988    0.469
              Body   2117     11624    0.564
BodyWithWheelchair   2117       153    0.770
              Head   2117      9936    0.580
              Face   2117      5653    0.482
              Hand   2117      7525    0.388
         Hand-Left   2117      3739    0.331
        Hand-Right   2117      3786    0.321
              Foot   2117      6572    0.317

Eval Model | AP       | AP50     | Precision  | Recall  
-------------------------------------------------------
Origin     | 0.471    | 0.7      | 0.774      | 0.628   
PTQ        | 0.467    | 0.696    | 0.773      | 0.624   
QAT - Best | 0.469    | 0.697    | 0.771      | 0.626   

QAT: Eval - Epoch 10 | AP: 0.4685  | AP50: 0.6975 | Precision: 0.7686 | Recall: 0.6251
  • ONNXへのエクスポート
python export_qat.py \
--weights runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu.pt \
--img-size 384 672 \
--include onnx \
--simplify

  • TensorRT へのデプロイ
/usr/src/tensorrt/bin/trtexec \
--onnx=runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu.onnx \
--int8 --fp16  \
--useCudaGraph \
--saveEngine=runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu.engine

TensorRT による推論パフォーマンスのベンチマーク

YOLOv9-T-ReLU

  • ベンチマーク結果: 992クエリ/sec
    export filepath_no_ext=runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu
    trtexec \
    --onnx=${filepath_no_ext}.onnx \
    --fp16 \
    --int8 \
    --saveEngine=${filepath_no_ext}.engine \
    --timingCacheFile=${filepath_no_ext}.engine.timing.cache \
    --warmUp=500 \
    --duration=10  \
    --useCudaGraph \
    --useSpinWait \
    --noDataTransfers
    
    === Performance summary ===
    Throughput: 992.939 qps
    Latency: min = 0.982422 ms, max = 1.85657 ms, mean = 1.00606 ms, median = 0.986084 ms, percentile(90%) = 0.987305 ms, percentile(95%) = 1.22876 ms, percentile(99%) = 1.43359 ms
    Enqueue Time: min = 0.000976562 ms, max = 0.0273438 ms, mean = 0.0015637 ms, median = 0.00146484 ms, percentile(90%) = 0.00219727 ms, percentile(95%) = 0.00292969 ms, percentile(99%) = 0.00402832 ms
    H2D Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms
    GPU Compute Time: min = 0.982422 ms, max = 1.85657 ms, mean = 1.00606 ms, median = 0.986084 ms, percentile(90%) = 0.987305 ms, percentile(95%) = 1.22876 ms, percentile(99%) = 1.43359 ms
    D2H Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms
    Total Host Walltime: 10.0026 s
    Total GPU Compute Time: 9.99216 s
    * GPU compute time is unstable, with coefficient of variance = 8.17993%.
      If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability.
    Explanations of the performance metrics are printed in the verbose logs.
    

YOLOv9-E-ReLU

  • ベンチマーク結果: 226クエリ/sec
    export filepath_no_ext=runs/qat/yolov9_qat_e/weights/qat_best_yolov9_wholebody_with_wheelchair_e_relu
    trtexec \
    --onnx=${filepath_no_ext}.onnx \
    --fp16 \
    --int8 \
    --saveEngine=${filepath_no_ext}.engine \
    --timingCacheFile=${filepath_no_ext}.engine.timing.cache \
    --warmUp=500 \
    --duration=10  \
    --useCudaGraph \
    --useSpinWait \
    --noDataTransfers
    
    === Performance summary ===
    Throughput: 226.477 qps
    Latency: min = 3.9444 ms, max = 6.67444 ms, mean = 4.41436 ms, median = 4.34595 ms, percentile(90%) = 4.98584 ms, percentile(95%) = 5.06982 ms, percentile(99%) = 5.30225 ms
    Enqueue Time: min = 0.000976562 ms, max = 0.0146484 ms, mean = 0.00329979 ms, median = 0.00292969 ms, percentile(90%) = 0.00537109 ms, percentile(95%) = 0.0065918 ms, percentile(99%) = 0.0100098 ms
    H2D Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms
    GPU Compute Time: min = 3.9444 ms, max = 6.67444 ms, mean = 4.41436 ms, median = 4.34595 ms, percentile(90%) = 4.98584 ms, percentile(95%) = 5.06982 ms, percentile(99%) = 5.30225 ms
    D2H Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms
    Total Host Walltime: 10.0099 s
    Total GPU Compute Time: 10.0074 s
    * GPU compute time is unstable, with coefficient of variance = 9.09848%.
      If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability.
    Explanations of the performance metrics are printed in the verbose logs.
    
PINTOPINTO

```
Eval Model | AP       | AP50     | Precision  | Recall  
-------------------------------------------------------
Origin     | 0.646    | 0.85     | 0.874      | 0.772   
PTQ        | 0.645    | 0.848    | 0.877      | 0.769   
QAT - Best | 0.647    | 0.85     | 0.88       | 0.768  
```
PINTOPINTO

Re-parameterization -> ONNX Export -> Post-Process merge script

Re-parameterization -> ONNX Export script
export_onnx.sh
TYPE=t
# RELU= or RELU=-relu
RELU=
RELUS=$(echo ${RELU} | sed 's/-/_/g')
MODEL_NAME=yolov9_${TYPE}_wholebody_with_wheelchair
SUFFIX="0100_1x3x"
# best-t.pt
# best-t-relu.pt
# best-e.pt
# best-e-relu.pt
MODEL_PATH=best-${TYPE}${RELU}.pt

RESOLUTIONS=(
    # "128 160"
    # "128 256"
    # "192 320"
    # "192 416"
    # "192 640"
    # "192 800"
    # "256 320"
    # "256 416"
    # "256 448"
    # "256 640"
    # "256 800"
    # "256 960"
    # "288 1280"
    # "288 480"
    # "288 640"
    # "288 800"
    # "288 960"
    # "320 320"
    # "384 1280"
    # "384 480"
    # "384 640"
    # "384 800"
    # "384 960"
    # "416 416"
    # "480 1280"
    # "480 640"
    # "480 800"
    # "480 960"
    # "512 512"
    # "512 640"
    # "512 896"
    # "544 1280"
    # "544 800"
    # "544 960"
    # "640 640"
    # "736 1280"
    # "576 1024"
    "384 672"
)

python reparameterization${RELUS}.py \
--type ${TYPE} \
--cfg ./models/detect/gelan-${TYPE}${RELU}.yaml \
--weights ${MODEL_PATH} \
--save ${MODEL_NAME}${RELUS}.pt

for((i=0; i<${#RESOLUTIONS[@]}; i++))
do
    RESOLUTION=(`echo ${RESOLUTIONS[i]}`)
    H=${RESOLUTION[0]}
    W=${RESOLUTION[1]}

    python export.py \
    --data data/original.yaml \
    --weights ${MODEL_NAME}${RELUS}.pt \
    --imgsz ${H} ${W} \
    --batch-size 1 \
    --device cpu \
    --opset 11 \
    --include onnx

    mv ${MODEL_NAME}${RELUS}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx

    sng4onnx \
    --input_onnx_file_path ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx \
    --output_onnx_file_path ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx

    onnxsim ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
    onnxsim ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
    onnxsim ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
done

python export.py \
--data data/original.yaml \
--weights ${MODEL_NAME}${RELUS}.pt \
--device cpu \
--opset 11 \
--include onnx \
--dynamic
mv ${MODEL_NAME}${RELUS}.onnx ${MODEL_NAME}${RELUS}_Nx3HxW.onnx
onnxsim ${MODEL_NAME}${RELUS}_Nx3HxW.onnx ${MODEL_NAME}${RELUS}_Nx3HxW.onnx
Post-Process merge script
#!/bin/bash

# pip install -U pip \
# && pip install onnxsim
# && pip install -U simple-onnx-processing-tools \
# && pip install -U onnx \
# && python3 -m pip install -U onnx_graphsurgeon --index-url https://pypi.ngc.nvidia.com \
# && pip install tensorflow==2.14.0

TYPE=t
# RELU= or RELU=-relu
RELU=
RELUS=$(echo ${RELU} | sed 's/-/_/g')
MODEL_NAME=yolov9_${TYPE}_wholebody_with_wheelchair${RELUS}
SUFFIX="0100_1x3x"

OPSET=11
BATCHES=1
CLASSES=8

RESOLUTIONS=(
    # "128 160 420"
    # "128 256 672"
    # "192 320 1260"
    # "192 416 1638"
    # "192 640 2520"
    # "192 800 3150"
    # "256 320 1680"
    # "256 416 2184"
    # "256 448 2352"
    # "256 640 3360"
    # "256 800 4200"
    # "256 960 5040"
    # "288 1280 7560"
    # "288 480 2835"
    # "288 640 3780"
    # "288 800 4725"
    # "288 960 5670"
    # "320 320 2100"
    # "384 1280 10080"
    # "384 480 3780"
    # "384 640 5040"
    # "384 800 6300"
    # "384 960 7560"
    # "416 416 3549"
    # "480 1280 12600"
    # "480 640 6300"
    # "480 800 7875"
    # "480 960 9450"
    # "512 512 5376"
    # "512 640 6720"
    # "512 896 9408"
    # "544 1280 14280"
    # "544 800 8925"
    # "544 960 10710"
    # "640 640 8400"
    # "736 1280 19320"
    # "576 1024 12096"
    "384 672 5292"
)

for((i=0; i<${#RESOLUTIONS[@]}; i++))
do
    RESOLUTION=(`echo ${RESOLUTIONS[i]}`)
    H=${RESOLUTION[0]}
    W=${RESOLUTION[1]}
    BOXES=${RESOLUTION[2]}

    ################################################### Boxes + Scores
    # 02_boxes_scores_${BOXES}.onnx
    python make_boxes_scores.py -o ${OPSET} -b ${BATCHES} -x ${BOXES} -c ${CLASSES}
    # 03_cxcywh_y1x1y2x2_${BOXES}.onnx
    python make_cxcywh_y1x1y2x2.py -o ${OPSET} -b ${BATCHES} -x ${BOXES}

    # 04_boxes_x1y1x2y2_y1x1y2x2_scores_${BOXES}
    snc4onnx \
    --input_onnx_file_paths 02_boxes_scores_${BOXES}.onnx 03_cxcywh_y1x1y2x2_${BOXES}.onnx \
    --srcop_destop boxes_cxcywh cxcywh \
    --op_prefixes_after_merging 02 03 \
    --output_onnx_file_path 04_boxes_x1y1x2y2_y1x1y2x2_scores_${BOXES}.onnx

    ################################################### NonMaxSuppression
    sog4onnx \
    --op_type Constant \
    --opset ${OPSET} \
    --op_name max_output_boxes_per_class_const \
    --output_variables max_output_boxes_per_class int64 [1] \
    --attributes value int64 [20] \
    --output_onnx_file_path 05_Constant_max_output_boxes_per_class.onnx

    sog4onnx \
    --op_type Constant \
    --opset ${OPSET} \
    --op_name iou_threshold_const \
    --output_variables iou_threshold float32 [1] \
    --attributes value float32 [0.40] \
    --output_onnx_file_path 06_Constant_iou_threshold.onnx

    sog4onnx \
    --op_type Constant \
    --opset ${OPSET} \
    --op_name score_threshold_const \
    --output_variables score_threshold float32 [1] \
    --attributes value float32 [0.25] \
    --output_onnx_file_path 07_Constant_score_threshold.onnx


    OP=NonMaxSuppression
    LOWEROP=${OP,,}
    sog4onnx \
    --op_type ${OP} \
    --opset ${OPSET} \
    --op_name ${LOWEROP}${OPSET} \
    --input_variables boxes_var float32 [${BATCHES},${BOXES},4] \
    --input_variables scores_var float32 [${BATCHES},${CLASSES},${BOXES}] \
    --input_variables max_output_boxes_per_class_var int64 [1] \
    --input_variables iou_threshold_var float32 [1] \
    --input_variables score_threshold_var float32 [1] \
    --output_variables selected_indices int64 [\'N\',3] \
    --attributes center_point_box int64 0 \
    --output_onnx_file_path 08_${OP}${OPSET}.onnx


    snc4onnx \
    --input_onnx_file_paths 05_Constant_max_output_boxes_per_class.onnx 08_${OP}${OPSET}.onnx \
    --srcop_destop max_output_boxes_per_class max_output_boxes_per_class_var \
    --output_onnx_file_path 08_${OP}${OPSET}.onnx

    snc4onnx \
    --input_onnx_file_paths 06_Constant_iou_threshold.onnx 08_${OP}${OPSET}.onnx \
    --srcop_destop iou_threshold iou_threshold_var \
    --output_onnx_file_path 08_${OP}${OPSET}.onnx

    snc4onnx \
    --input_onnx_file_paths 07_Constant_score_threshold.onnx 08_${OP}${OPSET}.onnx \
    --srcop_destop score_threshold score_threshold_var \
    --output_onnx_file_path 08_${OP}${OPSET}.onnx

    ################################################### Boxes + Scores + NonMaxSuppression
    snc4onnx \
    --input_onnx_file_paths 04_boxes_x1y1x2y2_y1x1y2x2_scores_${BOXES}.onnx 08_${OP}${OPSET}.onnx \
    --srcop_destop scores scores_var y1x1y2x2 boxes_var \
    --output_onnx_file_path 09_nms_yolox_${BOXES}.onnx


    ################################################### Myriad workaround Mul
    OP=Mul
    LOWEROP=${OP,,}
    OPSET=${OPSET}
    sog4onnx \
    --op_type ${OP} \
    --opset ${OPSET} \
    --op_name ${LOWEROP}${OPSET} \
    --input_variables workaround_mul_a int64 [\'N\',3] \
    --input_variables workaround_mul_b int64 [1] \
    --output_variables workaround_mul_out int64 [\'N\',3] \
    --output_onnx_file_path 10_${OP}${OPSET}_workaround.onnx


    ############ Myriad workaround Constant
    sog4onnx \
    --op_type Constant \
    --opset ${OPSET} \
    --op_name workaround_mul_const_op \
    --output_variables workaround_mul_const int64 [1] \
    --attributes value int64 [1] \
    --output_onnx_file_path 11_Constant_workaround_mul.onnx

    ############ Myriad workaround Mul + Myriad workaround Constant
    snc4onnx \
    --input_onnx_file_paths 11_Constant_workaround_mul.onnx 10_${OP}${OPSET}_workaround.onnx \
    --srcop_destop workaround_mul_const workaround_mul_b \
    --output_onnx_file_path 11_Constant_workaround_mul.onnx



    ################################################### NonMaxSuppression + Myriad workaround Mul
    snc4onnx \
    --input_onnx_file_paths 09_nms_yolox_${BOXES}.onnx 11_Constant_workaround_mul.onnx \
    --srcop_destop selected_indices workaround_mul_a \
    --output_onnx_file_path 09_nms_yolox_${BOXES}.onnx



    ################################################### Score GatherND
    python make_score_gather_nd.py -b ${BATCHES} -x ${BOXES} -c ${CLASSES}

    python -m tf2onnx.convert \
    --opset ${OPSET} \
    --tflite saved_model_postprocess/nms_score_gather_nd.tflite \
    --output 12_nms_score_gather_nd.onnx

    sor4onnx \
    --input_onnx_file_path 12_nms_score_gather_nd.onnx \
    --old_new ":0" "" \
    --search_mode "suffix_match" \
    --output_onnx_file_path 12_nms_score_gather_nd.onnx

    sor4onnx \
    --input_onnx_file_path 12_nms_score_gather_nd.onnx \
    --old_new "serving_default_input_1" "gn_scores" \
    --output_onnx_file_path 12_nms_score_gather_nd.onnx \
    --mode inputs

    sor4onnx \
    --input_onnx_file_path 12_nms_score_gather_nd.onnx \
    --old_new "serving_default_input_2" "gn_selected_indices" \
    --output_onnx_file_path 12_nms_score_gather_nd.onnx \
    --mode inputs

    sor4onnx \
    --input_onnx_file_path 12_nms_score_gather_nd.onnx \
    --old_new "PartitionedCall" "final_scores" \
    --output_onnx_file_path 12_nms_score_gather_nd.onnx \
    --mode outputs

    python make_input_output_shape_update.py \
    --input_onnx_file_path 12_nms_score_gather_nd.onnx \
    --output_onnx_file_path 12_nms_score_gather_nd.onnx \
    --input_names gn_scores \
    --input_names gn_selected_indices \
    --input_shapes ${BATCHES} ${CLASSES} ${BOXES} \
    --input_shapes N 3 \
    --output_names final_scores \
    --output_shapes N 1

    onnxsim 12_nms_score_gather_nd.onnx 12_nms_score_gather_nd.onnx
    onnxsim 12_nms_score_gather_nd.onnx 12_nms_score_gather_nd.onnx


    ################################################### NonMaxSuppression + Score GatherND
    snc4onnx \
    --input_onnx_file_paths 09_nms_yolox_${BOXES}.onnx 12_nms_score_gather_nd.onnx \
    --srcop_destop scores gn_scores workaround_mul_out gn_selected_indices \
    --output_onnx_file_path 09_nms_yolox_${BOXES}_nd.onnx

    onnxsim 09_nms_yolox_${BOXES}_nd.onnx 09_nms_yolox_${BOXES}_nd.onnx
    onnxsim 09_nms_yolox_${BOXES}_nd.onnx 09_nms_yolox_${BOXES}_nd.onnx


    ################################################### Final Batch Nums
    python make_final_batch_nums_final_class_nums_final_box_nums.py


    ################################################### Boxes GatherND
    python make_box_gather_nd.py

    python -m tf2onnx.convert \
    --opset ${OPSET} \
    --tflite saved_model_postprocess/nms_box_gather_nd.tflite \
    --output 14_nms_box_gather_nd.onnx

    sor4onnx \
    --input_onnx_file_path 14_nms_box_gather_nd.onnx \
    --old_new ":0" "" \
    --search_mode "suffix_match" \
    --output_onnx_file_path 14_nms_box_gather_nd.onnx

    sor4onnx \
    --input_onnx_file_path 14_nms_box_gather_nd.onnx \
    --old_new "serving_default_input_1" "gn_boxes" \
    --output_onnx_file_path 14_nms_box_gather_nd.onnx \
    --mode inputs

    sor4onnx \
    --input_onnx_file_path 14_nms_box_gather_nd.onnx \
    --old_new "serving_default_input_2" "gn_box_selected_indices" \
    --output_onnx_file_path 14_nms_box_gather_nd.onnx \
    --mode inputs

    sor4onnx \
    --input_onnx_file_path 14_nms_box_gather_nd.onnx \
    --old_new "PartitionedCall" "final_boxes" \
    --output_onnx_file_path 14_nms_box_gather_nd.onnx \
    --mode outputs

    python make_input_output_shape_update.py \
    --input_onnx_file_path 14_nms_box_gather_nd.onnx \
    --output_onnx_file_path 14_nms_box_gather_nd.onnx \
    --input_names gn_boxes \
    --input_names gn_box_selected_indices \
    --input_shapes ${BATCHES} ${BOXES} 4 \
    --input_shapes N 2 \
    --output_names final_boxes \
    --output_shapes N 4

    onnxsim 14_nms_box_gather_nd.onnx 14_nms_box_gather_nd.onnx
    onnxsim 14_nms_box_gather_nd.onnx 14_nms_box_gather_nd.onnx


    ################################################### nms_yolox_xxx_nd + nms_final_batch_nums_final_class_nums_final_box_nums
    snc4onnx \
    --input_onnx_file_paths 09_nms_yolox_${BOXES}_nd.onnx 13_nms_final_batch_nums_final_class_nums_final_box_nums.onnx \
    --srcop_destop workaround_mul_out bc_input \
    --op_prefixes_after_merging main01 sub01 \
    --output_onnx_file_path 15_nms_yolox_${BOXES}_split.onnx



    ################################################### nms_yolox_${BOXES}_split + nms_box_gather_nd
    snc4onnx \
    --input_onnx_file_paths 15_nms_yolox_${BOXES}_split.onnx 14_nms_box_gather_nd.onnx \
    --srcop_destop x1y1x2y2 gn_boxes final_box_nums gn_box_selected_indices \
    --output_onnx_file_path 16_nms_yolox_${BOXES}_merged.onnx

    onnxsim 16_nms_yolox_${BOXES}_merged.onnx 16_nms_yolox_${BOXES}_merged.onnx
    onnxsim 16_nms_yolox_${BOXES}_merged.onnx 16_nms_yolox_${BOXES}_merged.onnx



    ################################################### nms output merge
    python make_nms_outputs_merge.py

    onnxsim 17_nms_batchno_classid_x1y1x2y2_cat.onnx 17_nms_batchno_classid_x1y1x2y2_cat.onnx


    ################################################### merge
    snc4onnx \
    --input_onnx_file_paths 16_nms_yolox_${BOXES}_merged.onnx 17_nms_batchno_classid_x1y1x2y2_cat.onnx \
    --srcop_destop final_batch_nums cat_batch final_class_nums cat_classid final_scores cat_score final_boxes cat_x1y1x2y2 \
    --output_onnx_file_path 18_nms_yolox_${BOXES}.onnx

    onnxsim 18_nms_yolox_${BOXES}.onnx 18_nms_yolox_${BOXES}.onnx


    ################################################### yolox + Post-Process
    snc4onnx \
    --input_onnx_file_paths ${MODEL_NAME}_${SUFFIX}${H}x${W}.onnx 18_nms_yolox_${BOXES}.onnx \
    --srcop_destop output0 boxes_scores_input \
    --output_onnx_file_path ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx
    onnxsim ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx
    onnxsim ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx

    # ################################################### cleaning
    rm 0*_*.onnx
    rm 1*_*.onnx
done
PINTOPINTO
gelan-n-original.yaml
gelan-n-original.yaml
# YOLOv9

# parameters
nc: 8  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()

# anchors
anchors: 3

# gelan backbone
backbone:
  [
   # conv down
   [-1, 1, Conv, [8, 3, 2]],  # 0-P1/2

   # conv down
   [-1, 1, Conv, [16, 3, 2]],  # 1-P2/4

   # elan-1 block
   [-1, 1, ELAN1, [16, 16, 8]],  # 2

   # avg-conv down
   [-1, 1, AConv, [32]],  # 3-P3/8

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [32, 32, 16, 3]],  # 4

   # avg-conv down
   [-1, 1, AConv, [48]],  # 5-P4/16

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [48, 48, 24, 3]],  # 6

   # avg-conv down
   [-1, 1, AConv, [64]],  # 7-P5/32

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 8
  ]

# elan head
head:
  [
   # elan-spp block
   [-1, 1, SPPELAN, [64, 32]],  # 9

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [48, 48, 24, 3]],  # 12

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [32, 32, 16, 3]],  # 15

   # avg-conv-down merge
   [-1, 1, AConv, [24]],
   [[-1, 12], 1, Concat, [1]],  # cat head P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [48, 48, 24, 3]],  # 18 (P4/16-medium)

   # avg-conv-down merge
   [-1, 1, AConv, [32]],
   [[-1, 9], 1, Concat, [1]],  # cat head P5

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 21 (P5/32-large)

   # detect
   [[15, 18, 21], 1, DDetect, [nc]],  # DDetect(P3, P4, P5)
  ]
yolov9-n_original.yaml
yolov9-n_original.yaml
# YOLOv9

# parameters
nc: 8  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()

# anchors
anchors: 3

# gelan backbone
backbone:
  [
   # conv down
   [-1, 1, Conv, [8, 3, 2]],  # 0-P1/2

   # conv down
   [-1, 1, Conv, [16, 3, 2]],  # 1-P2/4

   # elan-1 block
   [-1, 1, ELAN1, [16, 16, 8]],  # 2

   # avg-conv down
   [-1, 1, AConv, [32]],  # 3-P3/8

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [32, 32, 16, 3]],  # 4

   # avg-conv down
   [-1, 1, AConv, [48]],  # 5-P4/16

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [48, 48, 24, 3]],  # 6

   # avg-conv down
   [-1, 1, AConv, [64]],  # 7-P5/32

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 8
  ]

# elan head
head:
  [
   # elan-spp block
   [-1, 1, SPPELAN, [64, 32]],  # 9

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [48, 48, 24, 3]],  # 12

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [32, 32, 16, 3]],  # 15

   # avg-conv-down merge
   [-1, 1, AConv, [24]],
   [[-1, 12], 1, Concat, [1]],  # cat head P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [48, 48, 24, 3]],  # 18 (P4/16-medium)

   # avg-conv-down merge
   [-1, 1, AConv, [32]],
   [[-1, 9], 1, Concat, [1]],  # cat head P5

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]],  # 21 (P5/32-large)

   # elan-spp block
   [8, 1, SPPELAN, [64, 32]],  # 22

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [48, 48, 24, 3]],  # 25

   # up-concat merge
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3

   # elan-2 block
   [-1, 1, RepNCSPELAN4, [32, 32, 16, 3]],  # 28

   # detect
   [[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]],  # Detect(P3, P4, P5)
  ]
  • Wholebody12
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-n_original.yaml \
--name yolov9-n \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 300 \
--close-mosaic 45

# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-n_original-relu.yaml \
--weights best-n.pt \
--name yolov9-n-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
  • Wholebody13
Train images     : 9,541
Validation images: 2,386
Total images     :11,927
===================================================
class_id: 0 name:                body count: 64,776
class_id: 1 name:body_with_wheelchair count:    925
class_id: 2 name:  body_with_crutches count:    539
class_id: 3 name:                head count: 55,190
class_id: 4 name:                face count: 30,153
class_id: 5 name:                 eye count: 26,550
class_id: 6 name:                nose count: 25,005
class_id: 7 name:               mouth count: 20,197
class_id: 8 name:                 ear count: 24,471
class_id: 9 name:                hand count: 40,129
class_id:10 name:           hand_left count: 20,150
class_id:11 name:          hand_right count: 19,979
class_id:12 name:                foot count: 34,997
---------------------------------------------------
                                Total count:363,061
===================================================
#!/bin/bash

while true; do
    python train_dual.py \
        --workers 8 \
        --device 0 \
        --batch 8 \
        --data data/original.yaml \
        --img 640 \
        --cfg models/detect/yolov9-n_original.yaml \
        --weights best-n.pt \
        --name yolov9-wholebody13-n \
        --hyp hyp.scratch-high_original.yaml \
        --min-items 0 \
        --epochs 245 \
        --close-mosaic 45 \
        --resume runs/train/yolov9-wholebody13-n/weights/last.pt
    
    if [ $? -eq 0 ]; then
        echo "Command succeeded."
        break
    else
        echo "Command failed. Retrying..."
    fi
done

# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-n_original-relu.yaml \
--weights best-n-wholebody13.pt \
--name yolov9-wholebody13-n-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100

#=======================================

#!/bin/bash

while true; do
    python train_dual.py \
        --workers 8 \
        --device 0 \
        --batch 16 \
        --data data/original.yaml \
        --img 640 \
        --cfg models/detect/yolov9-t_original.yaml \
        --weights best-t.pt \
        --name yolov9-wholebody13-t \
        --hyp hyp.scratch-high_original.yaml \
        --min-items 0 \
        --epochs 245 \
        --close-mosaic 45 \
        --resume runs/train/yolov9-wholebody13-t/weights/last.pt
    
    if [ $? -eq 0 ]; then
        echo "Command succeeded."
        break
    else
        echo "Command failed. Retrying..."
    fi
done

# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original-relu.yaml \
--weights best-t-wholebody13.pt \
--name yolov9-wholebody13-t-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100

#=======================================

python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-s-original.yaml \
--weights best-s.pt \
--name yolov9-wholebody13-s \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume runs/train/yolov9-wholebody13-s/weights/last.pt

python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-e_original.yaml \
--weights best-e.pt \
--name yolov9-wholebody13-e \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume runs/train/yolov9-wholebody13-e/weights/last.pt
PINTOPINTO
  • YOLOX-X (Swish) vs YOLOv9-E (Swish)
                 Class Images Instances     mAP50-95
                                        YOLOX-X YOLOv9-E
                   all   2117     48988   0.554    0.660
                  Body   2117     11624   0.614    0.748
    BodyWithWheelchair   2117       153   0.871    0.865
                  Head   2117      9936   0.585    0.723
                  Face   2117      5653   0.506    0.675
                  Hand   2117      7525   0.513    0.606
             Hand-Left   2117      3739   0.456    0.571
            Hand-Right   2117      3786   0.449    0.573
                  Foot   2117      6572   0.431    0.523
    
  • YOLOX-X (Swish) vs YOLOv9-E (ReLU)
                 Class Images Instances     mAP50-95
                                        YOLOX-X YOLOv9-E
                   all   2117     48988   0.554    0.647
                  Body   2117     11624   0.614    0.731
    BodyWithWheelchair   2117       153   0.871    0.850
                  Head   2117      9936   0.585    0.719
                  Face   2117      5653   0.506    0.673
                  Hand   2117      7525   0.513    0.589
             Hand-Left   2117      3739   0.456    0.556
            Hand-Right   2117      3786   0.449    0.552
                  Foot   2117      6572   0.431    0.508
    
PINTOPINTO
  • YOLOX-X (Swish) vs YOLOv9-C (Swish)
                 Class Images Instances       mAP50-95
                                          YOLOX-X YOLOv9-C
                   all   2117     48988     0.554    0.592
                  Body   2117     11624     0.614    0.687
    BodyWithWheelchair   2117       153     0.871    0.822
                  Head   2117      9936     0.585    0.674
                  Face   2117      5653     0.506    0.608
                  Hand   2117      7525     0.513    0.527
             Hand-Left   2117      3739     0.456    0.491
            Hand-Right   2117      3786     0.449    0.489
                  Foot   2117      6572     0.431    0.436
    
  • YOLOX-X (Swish) vs YOLOv9-C (ReLU)
                 Class Images  Instances     mAP50-95
                                         YOLOX-X YOLOv9-C
                   all   2117      48988   0.554    0.593
                  Body   2117      11624   0.614    0.679
    BodyWithWheelchair   2117        153   0.871    0.825
                  Head   2117       9936   0.585    0.676
                  Face   2117       5653   0.506    0.617
                  Hand   2117       7525   0.513    0.529
             Hand-Left   2117       3739   0.456    0.491
            Hand-Right   2117       3786   0.449    0.491
                  Foot   2117       6572   0.431    0.437
    
PINTOPINTO
  • YOLOX-X (Swish) vs YOLOv9-M (Swish)
                 Class Images  Instances      mAP50-95
                                          YOLOX-X YOLOv9-M
                   all   2117     48988     0.554    0.577
                  Body   2117     11624     0.614    0.668
    BodyWithWheelchair   2117       153     0.871    0.814
                  Head   2117      9936     0.585    0.666
                  Face   2117      5653     0.506    0.601
                  Hand   2117      7525     0.513    0.509
             Hand-Left   2117      3739     0.456    0.475
            Hand-Right   2117      3786     0.449    0.467
                  Foot   2117      6572     0.431    0.417
    
  • YOLOX-X (Swish) vs YOLOv9-M (ReLU)
                 Class Images  Instances      mAP50-95
                                         YOLOX-X YOLOv9-M
                   all   2117      48988   0.554    0.575
                  Body   2117      11624   0.614    0.663
    BodyWithWheelchair   2117        153   0.871    0.788
                  Head   2117       9936   0.585    0.664
                  Face   2117       5653   0.506    0.605
                  Hand   2117       7525   0.513    0.513
             Hand-Left   2117       3739   0.456    0.477
            Hand-Right   2117       3786   0.449    0.475
                  Foot   2117       6572   0.431    0.414
    
PINTOPINTO
  • YOLOX-X (Swish) vs YOLOv9-S (Swish)
                 Class Images  Instances     mAP50-95
                                         YOLOX-X YOLOv9-S
                   all   2117      48988   0.554    0.560
                  Body   2117      11624   0.614    0.672
    BodyWithWheelchair   2117        153   0.871    0.844
                  Head   2117       9936   0.585    0.646
                  Face   2117       5653   0.506    0.555
                  Hand   2117       7525   0.513    0.486
             Hand-Left   2117       3739   0.456    0.432
            Hand-Right   2117       3786   0.449    0.431
                  Foot   2117       6572   0.431    0.411
    
  • YOLOX-X (Swish) vs YOLOv9-S (ReLU)
                 Class Images  Instances      mAP50-95
                                         YOLOX-X YOLOv9-S
                   all   2117      48988   0.554    0.556
                  Body   2117      11624   0.614    0.659
    BodyWithWheelchair   2117        153   0.871    0.835
                  Head   2117       9936   0.585    0.640
                  Face   2117       5653   0.506    0.561
                  Hand   2117       7525   0.513    0.480
             Hand-Left   2117       3739   0.456    0.430
            Hand-Right   2117       3786   0.449    0.436
                  Foot   2117       6572   0.431    0.404
    
PINTOPINTO
  • YOLOX-Tiny (Swish) vs YOLOv9-T (Swish)
                 Class Images Instances     mAP50-95
                                        YOLOX-T YOLOv9-T
                   all   2117     48988   0.421    0.477
                  Body   2117     11624   0.463    0.590
    BodyWithWheelchair   2117       153   0.674    0.771
                  Head   2117      9936   0.507    0.591
                  Face   2117      5653   0.417    0.468
                  Hand   2117      7525   0.372    0.401
             Hand-Left   2117      3739   0.325    0.335
            Hand-Right   2117      3786   0.318    0.327
                  Foot   2117      6572   0.291    0.332
    
  • YOLOX-Tiny (Swish) vs YOLOv9-T (ReLU)
                 Class Images Instances     mAP50-95
                                        YOLOX-T YOLOv9-T
                   all   2117     48988   0.421    0.471
                  Body   2117     11624   0.463    0.567
    BodyWithWheelchair   2117       153   0.674    0.764
                  Head   2117      9936   0.507    0.584
                  Face   2117      5653   0.417    0.486
                  Hand   2117      7525   0.372    0.390
             Hand-Left   2117      3739   0.325    0.333
            Hand-Right   2117      3786   0.318    0.323
                  Foot   2117      6572   0.291    0.320
    
PINTOPINTO
  • YOLOX-Nano (Swish) vs YOLOv9-Nano (Swish)
                 Class Images Instances     mAP50-95
                                        YOLOX-N YOLOv9-N
                   all   2117     48988   0.353    0.352
                  Body   2117     11624   0.392    0.444
    BodyWithWheelchair   2117       153   0.587    0.571
                  Head   2117      9936   0.467    0.516
                  Face   2117      5653   0.362    0.397
                  Hand   2117      7525   0.306    0.266
             Hand-Left   2117      3739   0.237    0.198
            Hand-Right   2117      3786   0.241    0.198
                  Foot   2117      6572   0.231    0.225
    
  • YOLOX-Nano (Swish) vs YOLOv9-Nano (ReLU)
                 Class Images Instances     mAP50-95
                                        YOLOX-N YOLOv9-N
                   all   2117     48988   0.353    0.351
                  Body   2117     11624   0.392    0.435
    BodyWithWheelchair   2117       153   0.587    0.590
                  Head   2117      9936   0.467    0.505
                  Face   2117      5653   0.362    0.397
                  Hand   2117      7525   0.306    0.257
             Hand-Left   2117      3739   0.237    0.200
            Hand-Right   2117      3786   0.241    0.201
                  Foot   2117      6572   0.231    0.222
    
ログインするとコメントできます