Open18
YOLOv9 (not MIT)
optional
sudo apt-get update \
&& sudo apt-get install -y nano libgl1-mesa-dev libopencv-dev
pip install -r requirements.txt
data/original.yaml
data/original.yaml
train: /home/xxxx/git/yolov9/dataset/images/train
val: /home/xxxx/git/yolov9/dataset/images/val
# Classes
names:
0: Body
1: BodyWithWheelchair
2: Head
3: Face
4: Eye
5: Nose
6: Mouth
7: Ear
8: Hand
9: Hand-Left
10: Hand-Right
11: Foot
data/hyps/hyp.scratch-high_original.yaml
data/hyps/hyp.scratch-high_original.yaml
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 3.0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 7.5 # box loss gain
cls: 0.5 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 0.7 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
dfl: 1.5 # dfl loss gain
iou_t: 0.20 # IoU training threshold
anchor_t: 5.0 # anchor-multiple threshold
# anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.9 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.0 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.15 # image mixup (probability)
copy_paste: 0.3 # segment copy-paste (probability)
models/detect/yolov9-t_original.yaml
models/detect/yolov9-t_original.yaml
# YOLOv9
# parameters
nc: 8 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()
# anchors
anchors: 3
# gelan backbone
backbone:
[
# conv down
[-1, 1, Conv, [16, 3, 2]], # 0-P1/2
# conv down
[-1, 1, Conv, [32, 3, 2]], # 1-P2/4
# elan-1 block
[-1, 1, ELAN1, [32, 32, 16]], # 2
# avg-conv down
[-1, 1, AConv, [64]], # 3-P3/8
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 4
# avg-conv down
[-1, 1, AConv, [96]], # 5-P4/16
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 6
# avg-conv down
[-1, 1, AConv, [128]], # 7-P5/32
# elan-2 block
[-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 8
]
# elan head
head:
[
# elan-spp block
[-1, 1, SPPELAN, [128, 64]], # 9
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 12
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 15
# avg-conv-down merge
[-1, 1, AConv, [48]],
[[-1, 12], 1, Concat, [1]], # cat head P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 18 (P4/16-medium)
# avg-conv-down merge
[-1, 1, AConv, [64]],
[[-1, 9], 1, Concat, [1]], # cat head P5
# elan-2 block
[-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 21 (P5/32-large)
# elan-spp block
[8, 1, SPPELAN, [128, 64]], # 22
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 25
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 28
# detect
[[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]], # Detect(P3, P4, P5)
]
models/detect/yolov9-t_original-relu.yaml
models/detect/yolov9-t_original-relu.yaml
# YOLOv9
# parameters
nc: 8 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
#activation: nn.LeakyReLU(0.1)
activation: nn.ReLU()
# anchors
anchors: 3
# gelan backbone
backbone:
[
# conv down
[-1, 1, Conv, [16, 3, 2]], # 0-P1/2
# conv down
[-1, 1, Conv, [32, 3, 2]], # 1-P2/4
# elan-1 block
[-1, 1, ELAN1, [32, 32, 16]], # 2
# avg-conv down
[-1, 1, AConv, [64]], # 3-P3/8
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 4
# avg-conv down
[-1, 1, AConv, [96]], # 5-P4/16
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 6
# avg-conv down
[-1, 1, AConv, [128]], # 7-P5/32
# elan-2 block
[-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 8
]
# elan head
head:
[
# elan-spp block
[-1, 1, SPPELAN, [128, 64]], # 9
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 12
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 15
# avg-conv-down merge
[-1, 1, AConv, [48]],
[[-1, 12], 1, Concat, [1]], # cat head P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 18 (P4/16-medium)
# avg-conv-down merge
[-1, 1, AConv, [64]],
[[-1, 9], 1, Concat, [1]], # cat head P5
# elan-2 block
[-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 21 (P5/32-large)
# elan-spp block
[8, 1, SPPELAN, [128, 64]], # 22
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 25
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 28
# detect
[[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]], # Detect(P3, P4, P5)
]
- Before
utils/plots.py
utils/plots.pydef box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): # Add one xyxy box to image with label if self.pil or not is_ascii(label): self.draw.rectangle(box, width=self.lw, outline=color) # box if label: w, h = self.font.getsize(label) # text width, height outside = box[1] - h >= 0 # label fits outside box self.draw.rectangle( (box[0], box[1] - h if outside else box[1], box[0] + w + 1, box[1] + 1 if outside else box[1] + h + 1), fill=color, ) # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
- After
utils/plots.py
utils/plots.pydef box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): # Add one xyxy box to image with label if self.pil or not is_ascii(label): self.draw.rectangle(box, width=self.lw, outline=color) # box if label: _, _, w, h = self.font.getbbox(label) # text width, height outside = box[1] - h >= 0 # label fits outside box self.draw.rectangle( (box[0], box[1] - h if outside else box[1], box[0] + w + 1, box[1] + 1 if outside else box[1] + h + 1), fill=color, ) # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original.yaml \
--weights yolov9-t-converted.pt \
--name yolov9-t \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45
# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original-relu.yaml \
--weights best-t.pt \
--name yolov9-t-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
# Swish(SiLU) + dev255ノーマライズなし + RGBで学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original.yaml \
--weights yolov9-t-converted.pt \
--name yolov9-t-nonorm \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45
#####################################################
# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-s-original.yaml \
--weights yolov9-s-converted.pt \
--name yolov9-s \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45
# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-m_original.yaml \
--weights yolov9-m-converted.pt \
--name yolov9-m \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45
# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-m_original-relu.yaml \
--weights best-m.pt \
--name yolov9-m-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-c-original.yaml \
--weights yolov9-c-converted.pt \
--name yolov9-c \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45
# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-c_original-relu.yaml \
--weights best-c.pt \
--name yolov9-c-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
# Swish(SiLU) で学習
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-e_original.yaml \
--weights yolov9-e-converted.pt \
--name yolov9-e \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 145 \
--close-mosaic 45
# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-e_original-relu.yaml \
--weights best-e.pt \
--name yolov9-e-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
#==============================================
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original.yaml \
--weights runs/train/yolov9-t/weights/last.pt \
--name yolov9-t \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume
# val.py と val_dual.py の違い
# val.py はdual-branchを解消したあとのsingle-branchの状態のモデル用のval
# val_dual.py はdual-branchのままの状態のモデル用のval
# つまり、Re-parameterization をする前かした後かの違い
python val_dual.py \
--data data/original.yaml \
--img 640 \
--batch 32 \
--conf 0.001 \
--iou 0.7 \
--device 0 \
--weights runs/train/yolov9-t/weights/best-t.pt \
--name yolov9_t_640_val
python val_dual.py \
--data data/original.yaml \
--img 640 \
--batch 32 \
--conf 0.001 \
--iou 0.7 \
--device 0 \
--weights runs/train/yolov9-t/weights/best-t-relu.pt \
--name yolov9_t_640_val
python val_dual.py \
--data data/original.yaml \
--img 640 \
--batch 4 \
--conf 0.001 \
--iou 0.7 \
--device 0 \
--weights best-e.pt \
--name yolov9_e_640_val
- YOLOv9-T - 左右手のFlipあり - 改造していないので壊れる前提
Class Images Instances P R mAP50 mAP50-95
all 2280 69034 0.697 0.536 0.577 0.367
Body 2280 13031 0.737 0.721 0.763 0.554
BodyWithWheelchair 2280 133 0.759 0.955 0.968 0.858
Head 2280 11117 0.845 0.757 0.809 0.558
Face 2280 5563 0.836 0.687 0.739 0.479
Eye 2280 4748 0.652 0.299 0.343 0.134
Nose 2280 4607 0.709 0.42 0.462 0.242
Mouth 2280 3757 0.653 0.373 0.406 0.181
Ear 2280 4328 0.705 0.417 0.463 0.245
Hand 2280 7567 0.848 0.537 0.667 0.394
Hand-Left 2280 3798 0.477 0.369 0.363 0.215
Hand-Right 2280 3769 0.48 0.358 0.354 0.213
Foot 2280 6616 0.668 0.544 0.592 0.327
- YOLOv9-T - 左右手のFlipなし - 左右反転のオーグメンテーションを無効化した状態
Class Images Instances P R mAP50 mAP50-95
all 2280 69034 0.743 0.533 0.596 0.376
Body 2280 13031 0.735 0.724 0.764 0.554
BodyWithWheelchair 2280 133 0.773 0.94 0.947 0.820
Head 2280 11117 0.833 0.755 0.803 0.551
Face 2280 5563 0.837 0.682 0.739 0.496
Eye 2280 4748 0.679 0.278 0.328 0.126
Nose 2280 4607 0.721 0.387 0.433 0.222
Mouth 2280 3757 0.69 0.356 0.393 0.174
Ear 2280 4328 0.71 0.396 0.447 0.235
Hand 2280 7567 0.856 0.512 0.655 0.383
Hand-Left 2280 3798 0.712 0.419 0.535 0.320
Hand-Right 2280 3769 0.701 0.418 0.521 0.314
Foot 2280 6616 0.67 0.534 0.585 0.322
YOLOX-Tinyとの比較
Class Images Instances mAP50-95
YOLOX-Tiny YOLOv9-T
all 2280 69034 0.339 0.376
Body 2280 13031 0.453 0.554
BodyWithWheelchair 2280 133 0.688 0.820
head 2280 11117 0.482 0.551
face 2280 5563 0.434 0.496
eye 2280 4748 0.120 0.126
nose 2280 4607 0.215 0.222
mouth 2280 3757 0.185 0.174
ear 2280 4328 0.213 0.235
hand 2280 7567 0.373 0.383
hand_left 2280 3798 0.307 0.320
hand_right 2280 3769 0.304 0.314
foot 2280 6616 0.292 0.322
YOLOv9-E - 左右手のFlipなし - 左右反転のオーグメンテーションを無効化した状態(84epoch途中経過)
Class Images Instances mAP50-95
YOLOX-X YOLOv9-E
all 2117 48988 0.554 0.624
Body 2117 11624 0.614 0.712
BodyWithWheelchair 2117 153 0.871 0.851
Head 2117 9936 0.585 0.701
Face 2117 5653 0.506 0.621
Hand 2117 7525 0.513 0.570
Hand-Left 2117 3739 0.456 0.527
Hand-Right 2117 3786 0.449 0.527
Foot 2117 6572 0.431 0.485
-
YOLOv9-T - Re-parameterization
reparameterization.py
reparameterization.pyimport torch from models.yolo import Model import argparse def main(args): type: str = args.type cfg: str = args.cfg check_point_file: str = args.weights save_pt_file_name = args.save ckpt = torch.load(check_point_file, map_location='cpu') names = ckpt['model'].names nc = ckpt['model'].nc device = torch.device("cpu") model = Model(cfg, ch=3, nc=nc, anchors=3) print('') print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ nc: {nc}') print('') model = model.to(device) _ = model.eval() model.names = names model.nc = nc idx = 0 if type in ['n', 't', 's']: for k, v in model.state_dict().items(): if "model.{}.".format(idx) in k: if idx < 22: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") else: while True: idx += 1 if "model.{}.".format(idx) in k: break if idx < 22: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif type == 'm': for k, v in model.state_dict().items(): if "model.{}.".format(idx) in k: if idx < 22: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") else: while True: idx += 1 if "model.{}.".format(idx) in k: break if idx < 22: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif type == 'c': for k, v in model.state_dict().items(): if "model.{}.".format(idx) in k: if idx < 22: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] else: while True: idx += 1 if "model.{}.".format(idx) in k: break if idx < 22: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+1)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+16)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] elif type == 'e': for k, v in model.state_dict().items(): if "model.{}.".format(idx) in k: if idx < 29: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif idx < 42: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") else: while True: idx += 1 if "model.{}.".format(idx) in k: break if idx < 29: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif idx < 42: kr = k.replace("model.{}.".format(idx), "model.{}.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv2.".format(idx) in k: kr = k.replace("model.{}.cv2.".format(idx), "model.{}.cv4.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.cv3.".format(idx) in k: kr = k.replace("model.{}.cv3.".format(idx), "model.{}.cv5.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") elif "model.{}.dfl.".format(idx) in k: kr = k.replace("model.{}.dfl.".format(idx), "model.{}.dfl2.".format(idx+7)) model.state_dict()[k] -= model.state_dict()[k] model.state_dict()[k] += ckpt['model'].state_dict()[kr] print(k, "perfectly matched!!") _ = model.eval() m_ckpt = {'model': model.half(), 'optimizer': None, 'best_fitness': None, 'ema': None, 'updates': None, 'opt': None, 'git': None, 'date': None, 'epoch': -1} torch.save(m_ckpt, save_pt_file_name) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--type', type=str, default='t', help='convert model type (t or e)') parser.add_argument('--cfg', type=str, default='./models/detect/gelan-t.yaml', help='model.yaml path') parser.add_argument('--weights', type=str, default='./best-t.pt', help='weights path') parser.add_argument('--save', default=f'./yolov9_wholebody_with_wheelchair_t.pt', type=str, help='save path') args = parser.parse_args() main(args)
-
Re-parameterization と ONNX エクスポート
python reparameterization.py \ --type t \ --cfg ./models/detect/gelan-t.yaml \ --weights ./best-t.pt \ --save ./yolov9_wholebody_with_wheelchair_t.pt python export.py \ --data data/original.yaml \ --weights yolov9_wholebody_with_wheelchair_t.pt \ --imgsz 384 672 \ --batch-size 1 \ --device cpu \ --opset 11 \ --include onnx onnxsim yolov9_wholebody_with_wheelchair_t.onnx yolov9_wholebody_with_wheelchair_t.onnx onnxsim yolov9_wholebody_with_wheelchair_t.onnx yolov9_wholebody_with_wheelchair_t.onnx onnxsim yolov9_wholebody_with_wheelchair_t.onnx yolov9_wholebody_with_wheelchair_t.onnx
-
YOLOv9-T-ReLU - Re-parameterization
-
Re-parameterization と ONNX エクスポート
python reparameterization_relu.py \ --type t \ --cfg ./models/detect/gelan-t-relu.yaml \ --weights ./best-t-relu.pt \ --save ./yolov9_wholebody_with_wheelchair_t_relu.pt python export.py \ --data data/original.yaml \ --weights yolov9_wholebody_with_wheelchair_t_relu.pt \ --imgsz 384 672 \ --batch-size 1 \ --device cpu \ --opset 11 \ --include onnx onnxsim yolov9_wholebody_with_wheelchair_t_relu.onnx yolov9_wholebody_with_wheelchair_t_relu.onnx onnxsim yolov9_wholebody_with_wheelchair_t_relu.onnx yolov9_wholebody_with_wheelchair_t_relu.onnx onnxsim yolov9_wholebody_with_wheelchair_t_relu.onnx yolov9_wholebody_with_wheelchair_t_relu.onnx
- YOLOv9-T - 左右手のFlipなし+
SiLU
+QAT
export_qat.py の修正
export_qat.py
if is_model_qat:
warnings.filterwarnings("ignore")
LOGGER.info(f'{prefix} Model QAT Detected ...')
quant_nn.TensorQuantizer.use_fb_fake_quant = True
model.eval()
quantize.initialize()
quantize.replace_custom_module_forward(model)
with torch.no_grad():
torch.onnx.export(
model,
im,
f,
opset_version=13,
input_names=['images'],
output_names=output_names)
quantize.py の修正
quantize.py
def remove_redundant_qdq_model(onnx_model, f):
check_requirements('onnx')
import onnx
domain: str = onnx_model.domain
ir_version: int = onnx_model.ir_version
meta_data = {'domain': domain, 'ir_version': ir_version}
metadata_props = None
if hasattr(onnx_model, 'metadata_props'):
metadata_props = onnx_model.metadata_props
graph = gs.import_onnx(onnx_model)
nodes = graph.nodes
mul_nodes = [node for node in nodes if node.op == "Mul" and node.i(0).op == "Conv" and node.i(1).op == "Sigmoid"]
many_outputs_mul_nodes = []
for node in mul_nodes:
try:
for i in range(99):
node.o(i)
except:
if i > 1:
mul_nodename_outnum = {"node": node, "out_num": i}
many_outputs_mul_nodes.append(mul_nodename_outnum)
for node_dict in many_outputs_mul_nodes:
if node_dict["out_num"] == 2:
if node_dict["node"].o(0).op == "QuantizeLinear" and node_dict["node"].o(1).op == "QuantizeLinear":
if node_dict["node"].o(1).o(0).o(0).op == "Concat":
concat_dq_out_name = node_dict["node"].o(1).o(0).outputs[0].name
for i, concat_input in enumerate(node_dict["node"].o(1).o(0).o(0).inputs):
if concat_input.name == concat_dq_out_name:
node_dict["node"].o(1).o(0).o(0).inputs[i] = node_dict["node"].o(0).o(0).outputs[0]
else:
node_dict["node"].o(1).o(0).o(0).inputs[0] = node_dict["node"].o(0).o(0).outputs[0]
# elif node_dict["node"].o(0).op == "QuantizeLinear" and node_dict["node"].o(1).op == "Concat":
# concat_dq_out_name = node_dict["node"].outputs[0].outputs[0].inputs[0].name
# for i, concat_input in enumerate(node_dict["node"].outputs[0].outputs[1].inputs):
# if concat_input.name == concat_dq_out_name:
# #print("elif", concat_input.name, concat_dq_out_name )
# #print("will-be", node_dict["node"].outputs[0].outputs[1].inputs[i], node_dict["node"].outputs[0].outputs[0].o().outputs[0] )
# node_dict["node"].outputs[0].outputs[1].inputs[i] = node_dict["node"].outputs[0].outputs[0].o().outputs[0]
# add_nodes = [node for node in nodes if node.op == "Add"]
# many_outputs_add_nodes = []
# for node in add_nodes:
# try:
# for i in range(99):
# node.o(i)
# except:
# if i > 1 and node.o().op == "QuantizeLinear":
# add_nodename_outnum = {"node": node, "out_num": i}
# many_outputs_add_nodes.append(add_nodename_outnum)
# for node_dict in many_outputs_add_nodes:
# if node_dict["node"].outputs[0].outputs[0].op == "QuantizeLinear" and node_dict["node"].outputs[0].outputs[1].op == "Concat":
# concat_dq_out_name = node_dict["node"].outputs[0].outputs[0].inputs[0].name
# for i, concat_input in enumerate(node_dict["node"].outputs[0].outputs[1].inputs):
# if concat_input.name == concat_dq_out_name:
# node_dict["node"].outputs[0].outputs[1].inputs[i] = node_dict["node"].outputs[0].outputs[0].o().outputs[0]
exported_graph = gs.export_onnx(graph, **meta_data)
if metadata_props is not None:
exported_graph.metadata_props.extend(metadata_props)
onnx.save(exported_graph, f)
docker pull nvcr.io/nvidia/pytorch:23.02-py3
cd ~
mkdir -p work && cd work
git clone https://github.com/WongKinYiu/yolov9.git && cd yolov9
git checkout 5b1ea9a8b3f0ffe4fe0e203ec6232d788bb3fcff
docker run -it --gpus all \
--net host \
--ipc host \
-v `pwd`:/yolov9 \
nvcr.io/nvidia/pytorch:23.02-py3
cd /
# この独自のForkは onnx_graphsurgeon のバグを修正済み
# ir_version や metadata が破壊されるバグの修正
git clone https://github.com/PINTO0309/yolov9-qat.git && cd yolov9-qat
./patch_yolov9.sh /yolov9
./install_dependencies.sh --defaults --trex
cd /yolov9
/usr/local/lib/python3.8/dist-packages/pytorch_quantization/calib/histogram.py
257 def _compute_amax_mse(calib_hist, calib_bin_edges, num_bits, unsigned, stride=1, start_bin=128):
258 """Returns amax that minimizes MSE of the collected histogram"""
259
260 # If calibrator hasn't collected any data, return none
261 if calib_bin_edges is None and calib_hist is None:
262 return None
263
264 counts = torch.from_numpy(calib_hist[:]).float()
265 edges = torch.from_numpy(calib_bin_edges[:]).float()
266 centers = (edges[1:] + edges[:-1]) / 2
267
268 if len(centers) > len(counts):
269 centers = (edges[2:] + edges[:-2]) / 2
SiLU
バージョンでの一時的な動作確認(精度劣化大・パフォーマンス低)
活性化関数 data/original.yaml
data/original.yaml
train: /yolov9/dataset/images/train
val: /yolov9/dataset/images/val
# Classes
names:
0: Body
1: BodyWithWheelchair
2: Head
3: Face
4: Hand
5: Hand-Left
6: Hand-Right
7: Foot
data/hyps/hyp.scratch-high_original.yaml
data/hyps/hyp.scratch-high_original.yaml
lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937 # SGD momentum/Adam beta1
weight_decay: 0.0005 # optimizer weight decay 5e-4
warmup_epochs: 3.0 # warmup epochs (fractions ok)
warmup_momentum: 0.8 # warmup initial momentum
warmup_bias_lr: 0.1 # warmup initial bias lr
box: 7.5 # box loss gain
cls: 0.5 # cls loss gain
cls_pw: 1.0 # cls BCELoss positive_weight
obj: 0.7 # obj loss gain (scale with pixels)
obj_pw: 1.0 # obj BCELoss positive_weight
dfl: 1.5 # dfl loss gain
iou_t: 0.20 # IoU training threshold
anchor_t: 5.0 # anchor-multiple threshold
# anchors: 3 # anchors per output layer (0 to ignore)
fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # image HSV-Value augmentation (fraction)
degrees: 0.0 # image rotation (+/- deg)
translate: 0.1 # image translation (+/- fraction)
scale: 0.9 # image scale (+/- gain)
shear: 0.0 # image shear (+/- deg)
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # image flip up-down (probability)
fliplr: 0.0 # image flip left-right (probability)
mosaic: 1.0 # image mosaic (probability)
mixup: 0.15 # image mixup (probability)
copy_paste: 0.3 # segment copy-paste (probability)
models/detect/yolov9-t_original.yaml
models/detect/yolov9-t_original.yaml
# YOLOv9
# parameters
nc: 8 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()
# anchors
anchors: 3
# gelan backbone
backbone:
[
# conv down
[-1, 1, Conv, [16, 3, 2]], # 0-P1/2
# conv down
[-1, 1, Conv, [32, 3, 2]], # 1-P2/4
# elan-1 block
[-1, 1, ELAN1, [32, 32, 16]], # 2
# avg-conv down
[-1, 1, AConv, [64]], # 3-P3/8
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 4
# avg-conv down
[-1, 1, AConv, [96]], # 5-P4/16
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 6
# avg-conv down
[-1, 1, AConv, [128]], # 7-P5/32
# elan-2 block
[-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 8
]
# elan head
head:
[
# elan-spp block
[-1, 1, SPPELAN, [128, 64]], # 9
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 12
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 15
# avg-conv-down merge
[-1, 1, AConv, [48]],
[[-1, 12], 1, Concat, [1]], # cat head P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 18 (P4/16-medium)
# avg-conv-down merge
[-1, 1, AConv, [64]],
[[-1, 9], 1, Concat, [1]], # cat head P5
# elan-2 block
[-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 21 (P5/32-large)
# elan-spp block
[8, 1, SPPELAN, [128, 64]], # 22
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 25
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 28
# detect
[[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]], # Detect(P3, P4, P5)
]
python qat.py quantize \
--device cuda:0 \
--weights yolov9_wholebody_with_wheelchair_t.pt \
--name yolov9_qat \
--exist-ok \
--batch-size 4 \
--data data/original.yaml \
--img 640 \
--hyp data/hyps/hyp.scratch-high_original.yaml
-
SiLU
のまま QAT したので当然の低精度になる
Class Images Instances P R mAP50 mAP50-95
all 2117 48988 0.696 0.565 0.619 0.395
Body 2117 11624 0.7 0.715 0.753 0.529
BodyWithWheelchair 2117 153 0.553 0.865 0.823 0.652
Head 2117 9936 0.796 0.775 0.812 0.539
Face 2117 5653 0.794 0.583 0.632 0.391
Hand 2117 7525 0.805 0.453 0.578 0.313
Hand-Left 2117 3739 0.603 0.348 0.422 0.235
Hand-Right 2117 3786 0.638 0.347 0.426 0.235
Foot 2117 6572 0.683 0.439 0.504 0.263
QAT: Epoch-10, weights saved as runs/qat/yolov9_qat/weights/qat_ep_10_ap_0.3946_yolov9_wholebody_with_wheelchair_t.pt (11.4 MB)
Eval Model | AP | AP50 | Precision | Recall
-------------------------------------------------------
Origin | 0.399 | 0.622 | 0.696 | 0.567
PTQ | 0.395 | 0.618 | 0.693 | 0.565
QAT - Best | 0.395 | 0.618 | 0.699 | 0.562
QAT: Eval - Epoch 10 | AP: 0.3946 | AP50: 0.6187 | Precision: 0.6964 | Recall: 0.5655
ReLU
バージョンでの動作確認(精度劣化小・パフォーマンス高)
活性化関数 python qat.py quantize \
--device cuda:0 \
--weights yolov9_wholebody_with_wheelchair_t_relu.pt \
--name yolov9_qat \
--exist-ok \
--batch-size 4 \
--data data/original.yaml \
--img 640 \
--hyp data/hyps/hyp.scratch-high_original.yaml
-
ReLU
に変更して QAT したので0.2
ポイントの低下に抑制できている
Class Images Instances mAP50-95
all 2117 48988 0.469
Body 2117 11624 0.564
BodyWithWheelchair 2117 153 0.770
Head 2117 9936 0.580
Face 2117 5653 0.482
Hand 2117 7525 0.388
Hand-Left 2117 3739 0.331
Hand-Right 2117 3786 0.321
Foot 2117 6572 0.317
Eval Model | AP | AP50 | Precision | Recall
-------------------------------------------------------
Origin | 0.471 | 0.7 | 0.774 | 0.628
PTQ | 0.467 | 0.696 | 0.773 | 0.624
QAT - Best | 0.469 | 0.697 | 0.771 | 0.626
QAT: Eval - Epoch 10 | AP: 0.4685 | AP50: 0.6975 | Precision: 0.7686 | Recall: 0.6251
- ONNXへのエクスポート
python export_qat.py \
--weights runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu.pt \
--img-size 384 672 \
--include onnx \
--simplify
- TensorRT へのデプロイ
/usr/src/tensorrt/bin/trtexec \
--onnx=runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu.onnx \
--int8 --fp16 \
--useCudaGraph \
--saveEngine=runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu.engine
TensorRT による推論パフォーマンスのベンチマーク
YOLOv9-T-ReLU
- ベンチマーク結果: 992クエリ/sec
export filepath_no_ext=runs/qat/yolov9_qat/weights/qat_best_yolov9_wholebody_with_wheelchair_t_relu trtexec \ --onnx=${filepath_no_ext}.onnx \ --fp16 \ --int8 \ --saveEngine=${filepath_no_ext}.engine \ --timingCacheFile=${filepath_no_ext}.engine.timing.cache \ --warmUp=500 \ --duration=10 \ --useCudaGraph \ --useSpinWait \ --noDataTransfers
=== Performance summary === Throughput: 992.939 qps Latency: min = 0.982422 ms, max = 1.85657 ms, mean = 1.00606 ms, median = 0.986084 ms, percentile(90%) = 0.987305 ms, percentile(95%) = 1.22876 ms, percentile(99%) = 1.43359 ms Enqueue Time: min = 0.000976562 ms, max = 0.0273438 ms, mean = 0.0015637 ms, median = 0.00146484 ms, percentile(90%) = 0.00219727 ms, percentile(95%) = 0.00292969 ms, percentile(99%) = 0.00402832 ms H2D Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms GPU Compute Time: min = 0.982422 ms, max = 1.85657 ms, mean = 1.00606 ms, median = 0.986084 ms, percentile(90%) = 0.987305 ms, percentile(95%) = 1.22876 ms, percentile(99%) = 1.43359 ms D2H Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms Total Host Walltime: 10.0026 s Total GPU Compute Time: 9.99216 s * GPU compute time is unstable, with coefficient of variance = 8.17993%. If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. Explanations of the performance metrics are printed in the verbose logs.
YOLOv9-E-ReLU
- ベンチマーク結果: 226クエリ/sec
export filepath_no_ext=runs/qat/yolov9_qat_e/weights/qat_best_yolov9_wholebody_with_wheelchair_e_relu trtexec \ --onnx=${filepath_no_ext}.onnx \ --fp16 \ --int8 \ --saveEngine=${filepath_no_ext}.engine \ --timingCacheFile=${filepath_no_ext}.engine.timing.cache \ --warmUp=500 \ --duration=10 \ --useCudaGraph \ --useSpinWait \ --noDataTransfers
=== Performance summary === Throughput: 226.477 qps Latency: min = 3.9444 ms, max = 6.67444 ms, mean = 4.41436 ms, median = 4.34595 ms, percentile(90%) = 4.98584 ms, percentile(95%) = 5.06982 ms, percentile(99%) = 5.30225 ms Enqueue Time: min = 0.000976562 ms, max = 0.0146484 ms, mean = 0.00329979 ms, median = 0.00292969 ms, percentile(90%) = 0.00537109 ms, percentile(95%) = 0.0065918 ms, percentile(99%) = 0.0100098 ms H2D Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms GPU Compute Time: min = 3.9444 ms, max = 6.67444 ms, mean = 4.41436 ms, median = 4.34595 ms, percentile(90%) = 4.98584 ms, percentile(95%) = 5.06982 ms, percentile(99%) = 5.30225 ms D2H Latency: min = 0 ms, max = 0 ms, mean = 0 ms, median = 0 ms, percentile(90%) = 0 ms, percentile(95%) = 0 ms, percentile(99%) = 0 ms Total Host Walltime: 10.0099 s Total GPU Compute Time: 10.0074 s * GPU compute time is unstable, with coefficient of variance = 9.09848%. If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. Explanations of the performance metrics are printed in the verbose logs.
```
Eval Model | AP | AP50 | Precision | Recall
-------------------------------------------------------
Origin | 0.646 | 0.85 | 0.874 | 0.772
PTQ | 0.645 | 0.848 | 0.877 | 0.769
QAT - Best | 0.647 | 0.85 | 0.88 | 0.768
```
Re-parameterization -> ONNX Export -> Post-Process merge script
Re-parameterization -> ONNX Export script
export_onnx.sh
TYPE=t
# RELU= or RELU=-relu
RELU=
RELUS=$(echo ${RELU} | sed 's/-/_/g')
MODEL_NAME=yolov9_${TYPE}_wholebody_with_wheelchair
SUFFIX="0100_1x3x"
# best-t.pt
# best-t-relu.pt
# best-e.pt
# best-e-relu.pt
MODEL_PATH=best-${TYPE}${RELU}.pt
RESOLUTIONS=(
# "128 160"
# "128 256"
# "192 320"
# "192 416"
# "192 640"
# "192 800"
# "256 320"
# "256 416"
# "256 448"
# "256 640"
# "256 800"
# "256 960"
# "288 1280"
# "288 480"
# "288 640"
# "288 800"
# "288 960"
# "320 320"
# "384 1280"
# "384 480"
# "384 640"
# "384 800"
# "384 960"
# "416 416"
# "480 1280"
# "480 640"
# "480 800"
# "480 960"
# "512 512"
# "512 640"
# "512 896"
# "544 1280"
# "544 800"
# "544 960"
# "640 640"
# "736 1280"
# "576 1024"
"384 672"
)
python reparameterization${RELUS}.py \
--type ${TYPE} \
--cfg ./models/detect/gelan-${TYPE}${RELU}.yaml \
--weights ${MODEL_PATH} \
--save ${MODEL_NAME}${RELUS}.pt
for((i=0; i<${#RESOLUTIONS[@]}; i++))
do
RESOLUTION=(`echo ${RESOLUTIONS[i]}`)
H=${RESOLUTION[0]}
W=${RESOLUTION[1]}
python export.py \
--data data/original.yaml \
--weights ${MODEL_NAME}${RELUS}.pt \
--imgsz ${H} ${W} \
--batch-size 1 \
--device cpu \
--opset 11 \
--include onnx
mv ${MODEL_NAME}${RELUS}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
sng4onnx \
--input_onnx_file_path ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx \
--output_onnx_file_path ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
onnxsim ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
onnxsim ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
onnxsim ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}${RELUS}_${SUFFIX}${H}x${W}.onnx
done
python export.py \
--data data/original.yaml \
--weights ${MODEL_NAME}${RELUS}.pt \
--device cpu \
--opset 11 \
--include onnx \
--dynamic
mv ${MODEL_NAME}${RELUS}.onnx ${MODEL_NAME}${RELUS}_Nx3HxW.onnx
onnxsim ${MODEL_NAME}${RELUS}_Nx3HxW.onnx ${MODEL_NAME}${RELUS}_Nx3HxW.onnx
Post-Process merge script
#!/bin/bash
# pip install -U pip \
# && pip install onnxsim
# && pip install -U simple-onnx-processing-tools \
# && pip install -U onnx \
# && python3 -m pip install -U onnx_graphsurgeon --index-url https://pypi.ngc.nvidia.com \
# && pip install tensorflow==2.14.0
TYPE=t
# RELU= or RELU=-relu
RELU=
RELUS=$(echo ${RELU} | sed 's/-/_/g')
MODEL_NAME=yolov9_${TYPE}_wholebody_with_wheelchair${RELUS}
SUFFIX="0100_1x3x"
OPSET=11
BATCHES=1
CLASSES=8
RESOLUTIONS=(
# "128 160 420"
# "128 256 672"
# "192 320 1260"
# "192 416 1638"
# "192 640 2520"
# "192 800 3150"
# "256 320 1680"
# "256 416 2184"
# "256 448 2352"
# "256 640 3360"
# "256 800 4200"
# "256 960 5040"
# "288 1280 7560"
# "288 480 2835"
# "288 640 3780"
# "288 800 4725"
# "288 960 5670"
# "320 320 2100"
# "384 1280 10080"
# "384 480 3780"
# "384 640 5040"
# "384 800 6300"
# "384 960 7560"
# "416 416 3549"
# "480 1280 12600"
# "480 640 6300"
# "480 800 7875"
# "480 960 9450"
# "512 512 5376"
# "512 640 6720"
# "512 896 9408"
# "544 1280 14280"
# "544 800 8925"
# "544 960 10710"
# "640 640 8400"
# "736 1280 19320"
# "576 1024 12096"
"384 672 5292"
)
for((i=0; i<${#RESOLUTIONS[@]}; i++))
do
RESOLUTION=(`echo ${RESOLUTIONS[i]}`)
H=${RESOLUTION[0]}
W=${RESOLUTION[1]}
BOXES=${RESOLUTION[2]}
################################################### Boxes + Scores
# 02_boxes_scores_${BOXES}.onnx
python make_boxes_scores.py -o ${OPSET} -b ${BATCHES} -x ${BOXES} -c ${CLASSES}
# 03_cxcywh_y1x1y2x2_${BOXES}.onnx
python make_cxcywh_y1x1y2x2.py -o ${OPSET} -b ${BATCHES} -x ${BOXES}
# 04_boxes_x1y1x2y2_y1x1y2x2_scores_${BOXES}
snc4onnx \
--input_onnx_file_paths 02_boxes_scores_${BOXES}.onnx 03_cxcywh_y1x1y2x2_${BOXES}.onnx \
--srcop_destop boxes_cxcywh cxcywh \
--op_prefixes_after_merging 02 03 \
--output_onnx_file_path 04_boxes_x1y1x2y2_y1x1y2x2_scores_${BOXES}.onnx
################################################### NonMaxSuppression
sog4onnx \
--op_type Constant \
--opset ${OPSET} \
--op_name max_output_boxes_per_class_const \
--output_variables max_output_boxes_per_class int64 [1] \
--attributes value int64 [20] \
--output_onnx_file_path 05_Constant_max_output_boxes_per_class.onnx
sog4onnx \
--op_type Constant \
--opset ${OPSET} \
--op_name iou_threshold_const \
--output_variables iou_threshold float32 [1] \
--attributes value float32 [0.40] \
--output_onnx_file_path 06_Constant_iou_threshold.onnx
sog4onnx \
--op_type Constant \
--opset ${OPSET} \
--op_name score_threshold_const \
--output_variables score_threshold float32 [1] \
--attributes value float32 [0.25] \
--output_onnx_file_path 07_Constant_score_threshold.onnx
OP=NonMaxSuppression
LOWEROP=${OP,,}
sog4onnx \
--op_type ${OP} \
--opset ${OPSET} \
--op_name ${LOWEROP}${OPSET} \
--input_variables boxes_var float32 [${BATCHES},${BOXES},4] \
--input_variables scores_var float32 [${BATCHES},${CLASSES},${BOXES}] \
--input_variables max_output_boxes_per_class_var int64 [1] \
--input_variables iou_threshold_var float32 [1] \
--input_variables score_threshold_var float32 [1] \
--output_variables selected_indices int64 [\'N\',3] \
--attributes center_point_box int64 0 \
--output_onnx_file_path 08_${OP}${OPSET}.onnx
snc4onnx \
--input_onnx_file_paths 05_Constant_max_output_boxes_per_class.onnx 08_${OP}${OPSET}.onnx \
--srcop_destop max_output_boxes_per_class max_output_boxes_per_class_var \
--output_onnx_file_path 08_${OP}${OPSET}.onnx
snc4onnx \
--input_onnx_file_paths 06_Constant_iou_threshold.onnx 08_${OP}${OPSET}.onnx \
--srcop_destop iou_threshold iou_threshold_var \
--output_onnx_file_path 08_${OP}${OPSET}.onnx
snc4onnx \
--input_onnx_file_paths 07_Constant_score_threshold.onnx 08_${OP}${OPSET}.onnx \
--srcop_destop score_threshold score_threshold_var \
--output_onnx_file_path 08_${OP}${OPSET}.onnx
################################################### Boxes + Scores + NonMaxSuppression
snc4onnx \
--input_onnx_file_paths 04_boxes_x1y1x2y2_y1x1y2x2_scores_${BOXES}.onnx 08_${OP}${OPSET}.onnx \
--srcop_destop scores scores_var y1x1y2x2 boxes_var \
--output_onnx_file_path 09_nms_yolox_${BOXES}.onnx
################################################### Myriad workaround Mul
OP=Mul
LOWEROP=${OP,,}
OPSET=${OPSET}
sog4onnx \
--op_type ${OP} \
--opset ${OPSET} \
--op_name ${LOWEROP}${OPSET} \
--input_variables workaround_mul_a int64 [\'N\',3] \
--input_variables workaround_mul_b int64 [1] \
--output_variables workaround_mul_out int64 [\'N\',3] \
--output_onnx_file_path 10_${OP}${OPSET}_workaround.onnx
############ Myriad workaround Constant
sog4onnx \
--op_type Constant \
--opset ${OPSET} \
--op_name workaround_mul_const_op \
--output_variables workaround_mul_const int64 [1] \
--attributes value int64 [1] \
--output_onnx_file_path 11_Constant_workaround_mul.onnx
############ Myriad workaround Mul + Myriad workaround Constant
snc4onnx \
--input_onnx_file_paths 11_Constant_workaround_mul.onnx 10_${OP}${OPSET}_workaround.onnx \
--srcop_destop workaround_mul_const workaround_mul_b \
--output_onnx_file_path 11_Constant_workaround_mul.onnx
################################################### NonMaxSuppression + Myriad workaround Mul
snc4onnx \
--input_onnx_file_paths 09_nms_yolox_${BOXES}.onnx 11_Constant_workaround_mul.onnx \
--srcop_destop selected_indices workaround_mul_a \
--output_onnx_file_path 09_nms_yolox_${BOXES}.onnx
################################################### Score GatherND
python make_score_gather_nd.py -b ${BATCHES} -x ${BOXES} -c ${CLASSES}
python -m tf2onnx.convert \
--opset ${OPSET} \
--tflite saved_model_postprocess/nms_score_gather_nd.tflite \
--output 12_nms_score_gather_nd.onnx
sor4onnx \
--input_onnx_file_path 12_nms_score_gather_nd.onnx \
--old_new ":0" "" \
--search_mode "suffix_match" \
--output_onnx_file_path 12_nms_score_gather_nd.onnx
sor4onnx \
--input_onnx_file_path 12_nms_score_gather_nd.onnx \
--old_new "serving_default_input_1" "gn_scores" \
--output_onnx_file_path 12_nms_score_gather_nd.onnx \
--mode inputs
sor4onnx \
--input_onnx_file_path 12_nms_score_gather_nd.onnx \
--old_new "serving_default_input_2" "gn_selected_indices" \
--output_onnx_file_path 12_nms_score_gather_nd.onnx \
--mode inputs
sor4onnx \
--input_onnx_file_path 12_nms_score_gather_nd.onnx \
--old_new "PartitionedCall" "final_scores" \
--output_onnx_file_path 12_nms_score_gather_nd.onnx \
--mode outputs
python make_input_output_shape_update.py \
--input_onnx_file_path 12_nms_score_gather_nd.onnx \
--output_onnx_file_path 12_nms_score_gather_nd.onnx \
--input_names gn_scores \
--input_names gn_selected_indices \
--input_shapes ${BATCHES} ${CLASSES} ${BOXES} \
--input_shapes N 3 \
--output_names final_scores \
--output_shapes N 1
onnxsim 12_nms_score_gather_nd.onnx 12_nms_score_gather_nd.onnx
onnxsim 12_nms_score_gather_nd.onnx 12_nms_score_gather_nd.onnx
################################################### NonMaxSuppression + Score GatherND
snc4onnx \
--input_onnx_file_paths 09_nms_yolox_${BOXES}.onnx 12_nms_score_gather_nd.onnx \
--srcop_destop scores gn_scores workaround_mul_out gn_selected_indices \
--output_onnx_file_path 09_nms_yolox_${BOXES}_nd.onnx
onnxsim 09_nms_yolox_${BOXES}_nd.onnx 09_nms_yolox_${BOXES}_nd.onnx
onnxsim 09_nms_yolox_${BOXES}_nd.onnx 09_nms_yolox_${BOXES}_nd.onnx
################################################### Final Batch Nums
python make_final_batch_nums_final_class_nums_final_box_nums.py
################################################### Boxes GatherND
python make_box_gather_nd.py
python -m tf2onnx.convert \
--opset ${OPSET} \
--tflite saved_model_postprocess/nms_box_gather_nd.tflite \
--output 14_nms_box_gather_nd.onnx
sor4onnx \
--input_onnx_file_path 14_nms_box_gather_nd.onnx \
--old_new ":0" "" \
--search_mode "suffix_match" \
--output_onnx_file_path 14_nms_box_gather_nd.onnx
sor4onnx \
--input_onnx_file_path 14_nms_box_gather_nd.onnx \
--old_new "serving_default_input_1" "gn_boxes" \
--output_onnx_file_path 14_nms_box_gather_nd.onnx \
--mode inputs
sor4onnx \
--input_onnx_file_path 14_nms_box_gather_nd.onnx \
--old_new "serving_default_input_2" "gn_box_selected_indices" \
--output_onnx_file_path 14_nms_box_gather_nd.onnx \
--mode inputs
sor4onnx \
--input_onnx_file_path 14_nms_box_gather_nd.onnx \
--old_new "PartitionedCall" "final_boxes" \
--output_onnx_file_path 14_nms_box_gather_nd.onnx \
--mode outputs
python make_input_output_shape_update.py \
--input_onnx_file_path 14_nms_box_gather_nd.onnx \
--output_onnx_file_path 14_nms_box_gather_nd.onnx \
--input_names gn_boxes \
--input_names gn_box_selected_indices \
--input_shapes ${BATCHES} ${BOXES} 4 \
--input_shapes N 2 \
--output_names final_boxes \
--output_shapes N 4
onnxsim 14_nms_box_gather_nd.onnx 14_nms_box_gather_nd.onnx
onnxsim 14_nms_box_gather_nd.onnx 14_nms_box_gather_nd.onnx
################################################### nms_yolox_xxx_nd + nms_final_batch_nums_final_class_nums_final_box_nums
snc4onnx \
--input_onnx_file_paths 09_nms_yolox_${BOXES}_nd.onnx 13_nms_final_batch_nums_final_class_nums_final_box_nums.onnx \
--srcop_destop workaround_mul_out bc_input \
--op_prefixes_after_merging main01 sub01 \
--output_onnx_file_path 15_nms_yolox_${BOXES}_split.onnx
################################################### nms_yolox_${BOXES}_split + nms_box_gather_nd
snc4onnx \
--input_onnx_file_paths 15_nms_yolox_${BOXES}_split.onnx 14_nms_box_gather_nd.onnx \
--srcop_destop x1y1x2y2 gn_boxes final_box_nums gn_box_selected_indices \
--output_onnx_file_path 16_nms_yolox_${BOXES}_merged.onnx
onnxsim 16_nms_yolox_${BOXES}_merged.onnx 16_nms_yolox_${BOXES}_merged.onnx
onnxsim 16_nms_yolox_${BOXES}_merged.onnx 16_nms_yolox_${BOXES}_merged.onnx
################################################### nms output merge
python make_nms_outputs_merge.py
onnxsim 17_nms_batchno_classid_x1y1x2y2_cat.onnx 17_nms_batchno_classid_x1y1x2y2_cat.onnx
################################################### merge
snc4onnx \
--input_onnx_file_paths 16_nms_yolox_${BOXES}_merged.onnx 17_nms_batchno_classid_x1y1x2y2_cat.onnx \
--srcop_destop final_batch_nums cat_batch final_class_nums cat_classid final_scores cat_score final_boxes cat_x1y1x2y2 \
--output_onnx_file_path 18_nms_yolox_${BOXES}.onnx
onnxsim 18_nms_yolox_${BOXES}.onnx 18_nms_yolox_${BOXES}.onnx
################################################### yolox + Post-Process
snc4onnx \
--input_onnx_file_paths ${MODEL_NAME}_${SUFFIX}${H}x${W}.onnx 18_nms_yolox_${BOXES}.onnx \
--srcop_destop output0 boxes_scores_input \
--output_onnx_file_path ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx
onnxsim ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx
onnxsim ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx ${MODEL_NAME}_post_${SUFFIX}${H}x${W}.onnx
# ################################################### cleaning
rm 0*_*.onnx
rm 1*_*.onnx
done
gelan-n-original.yaml
gelan-n-original.yaml
# YOLOv9
# parameters
nc: 8 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()
# anchors
anchors: 3
# gelan backbone
backbone:
[
# conv down
[-1, 1, Conv, [8, 3, 2]], # 0-P1/2
# conv down
[-1, 1, Conv, [16, 3, 2]], # 1-P2/4
# elan-1 block
[-1, 1, ELAN1, [16, 16, 8]], # 2
# avg-conv down
[-1, 1, AConv, [32]], # 3-P3/8
# elan-2 block
[-1, 1, RepNCSPELAN4, [32, 32, 16, 3]], # 4
# avg-conv down
[-1, 1, AConv, [48]], # 5-P4/16
# elan-2 block
[-1, 1, RepNCSPELAN4, [48, 48, 24, 3]], # 6
# avg-conv down
[-1, 1, AConv, [64]], # 7-P5/32
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 8
]
# elan head
head:
[
# elan-spp block
[-1, 1, SPPELAN, [64, 32]], # 9
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [48, 48, 24, 3]], # 12
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [32, 32, 16, 3]], # 15
# avg-conv-down merge
[-1, 1, AConv, [24]],
[[-1, 12], 1, Concat, [1]], # cat head P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [48, 48, 24, 3]], # 18 (P4/16-medium)
# avg-conv-down merge
[-1, 1, AConv, [32]],
[[-1, 9], 1, Concat, [1]], # cat head P5
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 21 (P5/32-large)
# detect
[[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5)
]
yolov9-n_original.yaml
yolov9-n_original.yaml
# YOLOv9
# parameters
nc: 8 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
#activation: nn.LeakyReLU(0.1)
#activation: nn.ReLU()
# anchors
anchors: 3
# gelan backbone
backbone:
[
# conv down
[-1, 1, Conv, [8, 3, 2]], # 0-P1/2
# conv down
[-1, 1, Conv, [16, 3, 2]], # 1-P2/4
# elan-1 block
[-1, 1, ELAN1, [16, 16, 8]], # 2
# avg-conv down
[-1, 1, AConv, [32]], # 3-P3/8
# elan-2 block
[-1, 1, RepNCSPELAN4, [32, 32, 16, 3]], # 4
# avg-conv down
[-1, 1, AConv, [48]], # 5-P4/16
# elan-2 block
[-1, 1, RepNCSPELAN4, [48, 48, 24, 3]], # 6
# avg-conv down
[-1, 1, AConv, [64]], # 7-P5/32
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 8
]
# elan head
head:
[
# elan-spp block
[-1, 1, SPPELAN, [64, 32]], # 9
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [48, 48, 24, 3]], # 12
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [32, 32, 16, 3]], # 15
# avg-conv-down merge
[-1, 1, AConv, [24]],
[[-1, 12], 1, Concat, [1]], # cat head P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [48, 48, 24, 3]], # 18 (P4/16-medium)
# avg-conv-down merge
[-1, 1, AConv, [32]],
[[-1, 9], 1, Concat, [1]], # cat head P5
# elan-2 block
[-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 21 (P5/32-large)
# elan-spp block
[8, 1, SPPELAN, [64, 32]], # 22
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
# elan-2 block
[-1, 1, RepNCSPELAN4, [48, 48, 24, 3]], # 25
# up-concat merge
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
# elan-2 block
[-1, 1, RepNCSPELAN4, [32, 32, 16, 3]], # 28
# detect
[[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]], # Detect(P3, P4, P5)
]
- Wholebody12
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-n_original.yaml \
--name yolov9-n \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 300 \
--close-mosaic 45
# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-n_original-relu.yaml \
--weights best-n.pt \
--name yolov9-n-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
- Wholebody13
Train images : 9,541
Validation images: 2,386
Total images :11,927
===================================================
class_id: 0 name: body count: 64,776
class_id: 1 name:body_with_wheelchair count: 925
class_id: 2 name: body_with_crutches count: 539
class_id: 3 name: head count: 55,190
class_id: 4 name: face count: 30,153
class_id: 5 name: eye count: 26,550
class_id: 6 name: nose count: 25,005
class_id: 7 name: mouth count: 20,197
class_id: 8 name: ear count: 24,471
class_id: 9 name: hand count: 40,129
class_id:10 name: hand_left count: 20,150
class_id:11 name: hand_right count: 19,979
class_id:12 name: foot count: 34,997
---------------------------------------------------
Total count:363,061
===================================================
#!/bin/bash
while true; do
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-n_original.yaml \
--weights best-n.pt \
--name yolov9-wholebody13-n \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume runs/train/yolov9-wholebody13-n/weights/last.pt
if [ $? -eq 0 ]; then
echo "Command succeeded."
break
else
echo "Command failed. Retrying..."
fi
done
# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 8 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-n_original-relu.yaml \
--weights best-n-wholebody13.pt \
--name yolov9-wholebody13-n-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
#=======================================
#!/bin/bash
while true; do
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original.yaml \
--weights best-t.pt \
--name yolov9-wholebody13-t \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume runs/train/yolov9-wholebody13-t/weights/last.pt
if [ $? -eq 0 ]; then
echo "Command succeeded."
break
else
echo "Command failed. Retrying..."
fi
done
# Swish(SiLU) で学習した重みをもとに ReLU で再学習, mosaic aug 無効
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-t_original-relu.yaml \
--weights best-t-wholebody13.pt \
--name yolov9-wholebody13-t-relu \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 100 \
--close-mosaic 100
#=======================================
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-s-original.yaml \
--weights best-s.pt \
--name yolov9-wholebody13-s \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume runs/train/yolov9-wholebody13-s/weights/last.pt
python train_dual.py \
--workers 8 \
--device 0 \
--batch 16 \
--data data/original.yaml \
--img 640 \
--cfg models/detect/yolov9-e_original.yaml \
--weights best-e.pt \
--name yolov9-wholebody13-e \
--hyp hyp.scratch-high_original.yaml \
--min-items 0 \
--epochs 245 \
--close-mosaic 45 \
--resume runs/train/yolov9-wholebody13-e/weights/last.pt
- YOLOX-X (Swish) vs YOLOv9-E (Swish)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-E all 2117 48988 0.554 0.660 Body 2117 11624 0.614 0.748 BodyWithWheelchair 2117 153 0.871 0.865 Head 2117 9936 0.585 0.723 Face 2117 5653 0.506 0.675 Hand 2117 7525 0.513 0.606 Hand-Left 2117 3739 0.456 0.571 Hand-Right 2117 3786 0.449 0.573 Foot 2117 6572 0.431 0.523
- YOLOX-X (Swish) vs YOLOv9-E (ReLU)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-E all 2117 48988 0.554 0.647 Body 2117 11624 0.614 0.731 BodyWithWheelchair 2117 153 0.871 0.850 Head 2117 9936 0.585 0.719 Face 2117 5653 0.506 0.673 Hand 2117 7525 0.513 0.589 Hand-Left 2117 3739 0.456 0.556 Hand-Right 2117 3786 0.449 0.552 Foot 2117 6572 0.431 0.508
- YOLOX-X (Swish) vs YOLOv9-C (Swish)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-C all 2117 48988 0.554 0.592 Body 2117 11624 0.614 0.687 BodyWithWheelchair 2117 153 0.871 0.822 Head 2117 9936 0.585 0.674 Face 2117 5653 0.506 0.608 Hand 2117 7525 0.513 0.527 Hand-Left 2117 3739 0.456 0.491 Hand-Right 2117 3786 0.449 0.489 Foot 2117 6572 0.431 0.436
- YOLOX-X (Swish) vs YOLOv9-C (ReLU)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-C all 2117 48988 0.554 0.593 Body 2117 11624 0.614 0.679 BodyWithWheelchair 2117 153 0.871 0.825 Head 2117 9936 0.585 0.676 Face 2117 5653 0.506 0.617 Hand 2117 7525 0.513 0.529 Hand-Left 2117 3739 0.456 0.491 Hand-Right 2117 3786 0.449 0.491 Foot 2117 6572 0.431 0.437
- YOLOX-X (Swish) vs YOLOv9-M (Swish)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-M all 2117 48988 0.554 0.577 Body 2117 11624 0.614 0.668 BodyWithWheelchair 2117 153 0.871 0.814 Head 2117 9936 0.585 0.666 Face 2117 5653 0.506 0.601 Hand 2117 7525 0.513 0.509 Hand-Left 2117 3739 0.456 0.475 Hand-Right 2117 3786 0.449 0.467 Foot 2117 6572 0.431 0.417
- YOLOX-X (Swish) vs YOLOv9-M (ReLU)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-M all 2117 48988 0.554 0.575 Body 2117 11624 0.614 0.663 BodyWithWheelchair 2117 153 0.871 0.788 Head 2117 9936 0.585 0.664 Face 2117 5653 0.506 0.605 Hand 2117 7525 0.513 0.513 Hand-Left 2117 3739 0.456 0.477 Hand-Right 2117 3786 0.449 0.475 Foot 2117 6572 0.431 0.414
- YOLOX-X (Swish) vs YOLOv9-S (Swish)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-S all 2117 48988 0.554 0.560 Body 2117 11624 0.614 0.672 BodyWithWheelchair 2117 153 0.871 0.844 Head 2117 9936 0.585 0.646 Face 2117 5653 0.506 0.555 Hand 2117 7525 0.513 0.486 Hand-Left 2117 3739 0.456 0.432 Hand-Right 2117 3786 0.449 0.431 Foot 2117 6572 0.431 0.411
- YOLOX-X (Swish) vs YOLOv9-S (ReLU)
Class Images Instances mAP50-95 YOLOX-X YOLOv9-S all 2117 48988 0.554 0.556 Body 2117 11624 0.614 0.659 BodyWithWheelchair 2117 153 0.871 0.835 Head 2117 9936 0.585 0.640 Face 2117 5653 0.506 0.561 Hand 2117 7525 0.513 0.480 Hand-Left 2117 3739 0.456 0.430 Hand-Right 2117 3786 0.449 0.436 Foot 2117 6572 0.431 0.404
- YOLOX-Tiny (Swish) vs YOLOv9-T (Swish)
Class Images Instances mAP50-95 YOLOX-T YOLOv9-T all 2117 48988 0.421 0.477 Body 2117 11624 0.463 0.590 BodyWithWheelchair 2117 153 0.674 0.771 Head 2117 9936 0.507 0.591 Face 2117 5653 0.417 0.468 Hand 2117 7525 0.372 0.401 Hand-Left 2117 3739 0.325 0.335 Hand-Right 2117 3786 0.318 0.327 Foot 2117 6572 0.291 0.332
- YOLOX-Tiny (Swish) vs YOLOv9-T (ReLU)
Class Images Instances mAP50-95 YOLOX-T YOLOv9-T all 2117 48988 0.421 0.471 Body 2117 11624 0.463 0.567 BodyWithWheelchair 2117 153 0.674 0.764 Head 2117 9936 0.507 0.584 Face 2117 5653 0.417 0.486 Hand 2117 7525 0.372 0.390 Hand-Left 2117 3739 0.325 0.333 Hand-Right 2117 3786 0.318 0.323 Foot 2117 6572 0.291 0.320
- YOLOX-Nano (Swish) vs YOLOv9-Nano (Swish)
Class Images Instances mAP50-95 YOLOX-N YOLOv9-N all 2117 48988 0.353 0.352 Body 2117 11624 0.392 0.444 BodyWithWheelchair 2117 153 0.587 0.571 Head 2117 9936 0.467 0.516 Face 2117 5653 0.362 0.397 Hand 2117 7525 0.306 0.266 Hand-Left 2117 3739 0.237 0.198 Hand-Right 2117 3786 0.241 0.198 Foot 2117 6572 0.231 0.225
- YOLOX-Nano (Swish) vs YOLOv9-Nano (ReLU)
Class Images Instances mAP50-95 YOLOX-N YOLOv9-N all 2117 48988 0.353 0.351 Body 2117 11624 0.392 0.435 BodyWithWheelchair 2117 153 0.587 0.590 Head 2117 9936 0.467 0.505 Face 2117 5653 0.362 0.397 Hand 2117 7525 0.306 0.257 Hand-Left 2117 3739 0.237 0.200 Hand-Right 2117 3786 0.241 0.201 Foot 2117 6572 0.231 0.222
ログインするとコメントできます