Open22

HollywoodHeads / Brainwash darknet

PINTOPINTO
  1. Splits/train.txtSplits/test.txt をファイルの相対パスへ変更するとともに拡張子 .jpg を付与する
  2. 編集済みの train.txttest.txt の出力先は crowdhuman-{width}x{height} 配下とする
PINTOPINTO
"""
This script converts the data from the Hollywoods dataset
in the format that is expected for the training by the train.py script.
"""
from __future__ import division

import xml.etree.ElementTree as ET
import os
import math
from PIL import Image
import numpy as np
import cv2
from tqdm import tqdm

PHASES = ['train', 'val', 'test']

def resize_and_pad(img, size, pad_color=0):
    h, w = img.shape[:2]
    sh, sw = size
    if h > sh or w > sw:
        interp = cv2.INTER_AREA
    else:
        interp = cv2.INTER_CUBIC
    aspect = w/h
    if aspect > 1:
        new_w = sw
        new_h = np.round(new_w/aspect).astype(int)
        pad_vert = (sh-new_h)/2
        pad_top, pad_bot = \
            np.floor(pad_vert).astype(int), np.ceil(pad_vert).astype(int)
        pad_left, pad_right = 0, 0
    elif aspect < 1:
        new_h = sh
        new_w = np.round(new_h*aspect).astype(int)
        pad_horz = (sw-new_w)/2
        pad_left, pad_right = \
            np.floor(pad_horz).astype(int), np.ceil(pad_horz).astype(int)
        pad_top, pad_bot = 0, 0
    else:
        new_h, new_w = sh, sw
        pad_left, pad_right, pad_top, pad_bot = 0, 0, 0, 0
    if len(img.shape) == 3 and not isinstance(pad_color, (list, tuple, np.ndarray)):
        pad_color = [pad_color]*3
    scaled_img = cv2.resize(
        img,
        (new_w, new_h),
        interpolation=interp
    )
    scaled_img = cv2.copyMakeBorder(
        scaled_img,
        pad_top,
        pad_bot,
        pad_left,
        pad_right,
        borderType=cv2.BORDER_CONSTANT,
        value=pad_color
    )
    return scaled_img, pad_left, pad_right, pad_top, pad_bot


def convert_txt(
    root_path,
    src_path,
    dst_path,
    ann_path,
    show,
    resized_images_size_h,
    resized_images_size_w,
    yolo_path,
    yolo_root_path,
    yolo_label
):

    os.makedirs(f'{yolo_path}', exist_ok=True)

    with open(dst_path, "w") as wfp:
        with open(src_path, 'r') as rfp:
            for line in tqdm(rfp.readlines()):
                line, _ = line.split("\n")
                ann_file_name = line + ".xml"
                insert_line = ""
                insert_line = insert_line + line + ".jpg"
                img_name = line + ".jpeg"
                img_path = os.path.join(f'{root_path}/JPEGImages', img_name)
                f = Image.open(img_path)
                f.convert('RGB')
                original_image_size_w, original_image_size_h = f.size

                f = np.array(f, dtype=np.uint8)
                try:
                    img, pad_left, pad_right, pad_top, pad_bot = \
                        resize_and_pad(f, (resized_images_size_h, resized_images_size_w))
                except:
                    continue
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                insert_line = "\"" + insert_line + "\""
                ann = ET.parse(os.path.join(ann_path,ann_file_name))
                bboxs = []
                for obj in ann.findall('object'):
                    bbox_ann = obj.find('bndbox')
                    if bbox_ann is None:
                        continue
                    bboxs.append(
                        [float(bbox_ann.find(tag).text) - 1 for tag in ('xmin', 'ymin', 'xmax', 'ymax')]
                    )
                for i in range(len(bboxs)):
                    bbox = bboxs[i]

                    image_size_final_w = resized_images_size_w - (pad_left + pad_right)
                    image_size_final_h = resized_images_size_h - (pad_top + pad_bot)

                    bbox[0] = int(bbox[0]*(image_size_final_w/original_image_size_w)+pad_left)
                    bbox[1] = int(bbox[1]*(image_size_final_h/original_image_size_h)+pad_top)
                    bbox[2] = int(bbox[2]*(image_size_final_w/original_image_size_w)+pad_left)
                    bbox[3] = int(bbox[3]*(image_size_final_h/original_image_size_h)+pad_top)

                    bbox[0] = 0 if bbox[0] < 0 else resized_images_size_w if bbox[0] > resized_images_size_w else bbox[0]
                    bbox[1] = 0 if bbox[1] < 0 else resized_images_size_h if bbox[1] > resized_images_size_h else bbox[1]
                    bbox[2] = 0 if bbox[2] < 0 else resized_images_size_w if bbox[2] > resized_images_size_w else bbox[2]
                    bbox[3] = 0 if bbox[3] < 0 else resized_images_size_h if bbox[3] > resized_images_size_h else bbox[3]

                if show:
                    view_img = img.copy()
                    for box in bboxs:
                        cv2.rectangle(
                            view_img,
                            pt1=(box[0], box[1]),
                            pt2=(box[2], box[3]),
                            color=(0, 0, 255),
                            thickness=1,
                        )

                    cv2.imshow('image', view_img)
                    if cv2.waitKey(0)&0xFF == ord('q'):
                        break

                bboxs_string = ""
                nbboxs = len(bboxs)
                index = 1
                if nbboxs == 0:
                    pass

                else:
                    # Image
                    cv2.imwrite(
                        os.path.join(f'{yolo_path}', line+".jpg"),
                        img
                    )
                    # IDL
                    for bbox in bboxs:
                        bbox_string = ""
                        if index <= nbboxs - 1:
                            bbox_string = bbox_string + ', '.join(str(math.floor(e)) for e in bbox)
                            bbox_string = '(' + bbox_string + '), '
                            bboxs_string = bboxs_string + bbox_string
                            index += 1
                        else:
                            bbox_string = bbox_string + ', '.join(str(math.floor(e)) for e in bbox)
                            bbox_string = '(' + bbox_string + '); '
                            bboxs_string = bboxs_string + bbox_string
                            index += 1
                    insert_line = insert_line + ': '
                    insert_line = insert_line + bboxs_string + '\n'
                    wfp.write(insert_line)

                    # YOLO
                    insert_line = insert_line.replace(":",";")
                    img_dir = insert_line.split(";")[0]
                    img_boxs = insert_line.split(";")[1]
                    img_dir = img_dir.replace('"',"")
                    img_name = img_dir
                    txt_name = img_name.split(".")[0]
                    img_extension = img_name.split(".")[1]
                    img_boxs = img_boxs.replace(",","")
                    img_boxs = img_boxs.replace("(","")
                    img_boxs = img_boxs.split(")")
                    if(img_extension == 'jpg'):
                        for n in range(len(img_boxs)-1):
                            box = img_boxs[n]
                            box = box.split(" ")
                            with open(f'{yolo_path}/{txt_name}.txt','a') as f:
                                f.write(' '.join(
                                    [
                                        yolo_label,
                                        str((float(box[1]) + float(box[3]))/(2*resized_images_size_w)),
                                        str((float(box[2]) + float(box[4]))/(2*resized_images_size_h)),
                                        str((float(box[3]) - float(box[1]))/resized_images_size_w),
                                        str((float(box[4]) - float(box[2]))/resized_images_size_h)
                                    ]
                                )+'\n')
                        with open(f'{yolo_path}/{os.path.basename(src_path)}','a') as f:
                            f.write(f'{yolo_root_path}/{img_name}\n')

    if show:
        cv2.destroyAllWindows()


def convert_hollywood(
    root_path,
    show,
    resized_images_size_h,
    resized_images_size_w,
    yolo_path,
    yolo_root_path,
    yolo_label
):
    splits_folder = os.path.join(root_path, 'Splits')
    ann_folder = os.path.join(root_path, 'Annotations')
    for phase in PHASES:
        data_list_path = ''
        data_path = ''
        if phase == 'train':
            print("Phase train ongoing...")
            data_list_path = os.path.join(splits_folder, 'train.txt')
            data_path = os.path.join(root_path, 'hollywood_train.idl')

        # elif phase == 'val':
        #     print("Phase val ongoing...")
        #     data_list_path = os.path.join(splits_folder, 'val.txt')
        #     data_path = os.path.join(root_path, 'hollywood_val.idl')

        else:
            print("Phase test ongoing...")
            data_list_path = os.path.join(splits_folder, 'test.txt')
            data_path = os.path.join(root_path, 'hollywood_test.idl')

        convert_txt(
            root_path,
            data_list_path,
            data_path,
            ann_folder,
            show,
            resized_images_size_h,
            resized_images_size_w,
            yolo_path,
            yolo_root_path,
            yolo_label
        )


if __name__ == "__main__":
    root_path = 'HollywoodHeads'
    show = False
    resized_images_size_h = 480
    resized_images_size_w = 640

    yolo_path = f'{root_path}/crowdhuman-{resized_images_size_w}x{resized_images_size_h}'
    yolo_root_path = f'data/crowdhuman-{resized_images_size_w}x{resized_images_size_h}'
    yolo_label = '0' # 0:head, 1:person

    convert_hollywood(
        root_path,
        show,
        resized_images_size_h,
        resized_images_size_w,
        yolo_path,
        yolo_root_path,
        yolo_label
    )
PINTOPINTO
find HollywoodHeads/crowdhuman-640x480 -name "*.jpg" -print0 | xargs -0 -I {} cp {} ${HOME}/git/yolov4_crowdhuman/data/crowdhuman-640x480

cat HollywoodHeads/crowdhuman-640x480/train.txt >>  ${HOME}/git/yolov4_crowdhuman/data/crowdhuman-640x480/train.txt
cat HollywoodHeads/crowdhuman-640x480/test.txt >> ${HOME}/git/yolov4_crowdhuman/data/crowdhuman-640x480/test.txt

mv HollywoodHeads/crowdhuman-640x480/train.txt HollywoodHeads/crowdhuman-640x480/train.txt_
mv HollywoodHeads/crowdhuman-640x480/test.txt HollywoodHeads/crowdhuman-640x480/test.txt_

find HollywoodHeads/crowdhuman-640x480 -name "*.txt" -print0 | xargs -0 -I {} cp {} ${HOME}/git/yolov4_crowdhuman/data/crowdhuman-640x480
PINTOPINTO
tar -zcvf crowdhuman-640x480.tar.gz ${HOME}/git/yolov4_crowdhuman/data/crowdhuman-640x480
PINTOPINTO
cd ~/git
git clone https://github.com/linghu8812/tensorrt_inference
cd tensorrt_inference
cp Yolov4/export_onnx.py Yolov4/export_onnx_org.py
sed -i 's/self.classes = 80/self.classes = 1/g' Yolov4/export_onnx.py

cd Yolov4

cp ~/work/yolov4_crowdhuman_hollywood/cfg/yolov4-crowdhuman-640x480.cfg cfg
mkdir -p weights
cp ~/work/yolov4_crowdhuman_hollywood/backup/yolov4-crowdhuman-640x480_best.weights weights

sed -i 's/batch=64/batch=1/g' cfg/yolov4-crowdhuman-640x480.cfg
sed -i 's/subdivisions=18/subdivisions=1/g' cfg/yolov4-crowdhuman-640x480.cfg

python3 export_onnx.py \
--cfg_file cfg/yolov4-crowdhuman-640x480.cfg \
--weights_file weights/yolov4-crowdhuman-640x480_best.weights \
--output_file yolov4_headdetection_480x640.onnx
PINTOPINTO
  • onnx-simplifier
python3 -m onnxsim \
yolov4_headdetection_480x640.onnx \
yolov4_headdetection_480x640.onnx
  • onnx_optimization_process.py
import onnx
import onnxoptimizer
import argparse

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input", required=True, help="input model")
    parser.add_argument("--output", required=True, help="output model")
    args = parser.parse_args()
    return args

def remove_initializer_from_input():
    args = get_args()
    model = onnx.load(args.input)
    if model.ir_version < 4:
        print(
            'Model with ir_version below 4 requires to include initilizer in graph input'
        )
        return
    inputs = model.graph.input
    name_to_input = {}
    for input in inputs:
        name_to_input[input.name] = input
    for initializer in model.graph.initializer:
        if initializer.name in name_to_input:
            inputs.remove(name_to_input[initializer.name])
    passes = [
        "extract_constant_to_initializer",
        "eliminate_unused_initializer"
    ]
    optimized_model = onnxoptimizer.optimize(model, passes)
    onnx.save(optimized_model, args.output)

if __name__ == '__main__':
    remove_initializer_from_input()
  • optimization
python3 onnx_optimization_process.py \
--input yolov4_headdetection_480x640.onnx \
--output yolov4_headdetection_480x640.onnx
PINTOPINTO
cp config.yaml config_org.yaml
cp coco.names headdetection.names

sed -i 's/..\/yolov4.onnx/..\/yolov4_headdetection_480x640.onnx/g' config.yaml
sed -i 's/..\/yolov4.trt/..\/yolov4_headdetection_640x480.trt/g' config.yaml

echo head > headdetection.names
sed -i 's/..\/coco.names/..\/headdetection.names/g' config.yaml

sed -i 's/IMAGE_WIDTH:   608/IMAGE_WIDTH:   640/g' config.yaml
sed -i 's/IMAGE_HEIGHT:  608/IMAGE_HEIGHT:  480/g' config.yaml

sed -i 's/\[\[12, 16\], \[19, 36\], \[40, 28\], \[36, 75\], \[76, 55\], \[72, 146\], \[142, 110\], \[192, 243\], \[459, 401\]\]/\[\[8, 9\], \[14, 18\], \[21, 29\], \[30, 42\], \[42, 57\], \[58, 79\], \[79, 113\], \[115, 167\], \[159, 303\]\]/g' config.yaml

cd ..
PINTOPINTO
xhost +local: && \
docker run --gpus all -it --rm \
-v `pwd`:/home/user/workdir \
-v /tmp/.X11-unix/:/tmp/.X11-unix:rw \
--device /dev/video0:/dev/video0:mwr \
--net=host \
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR \
-e DISPLAY=$DISPLAY \
--privileged \
ghcr.io/pinto0309/openvino2tensorflow:latest

sed -i 's/override/noexcept override/g' includes/common/logging.h

mkdir -p Yolov4/build && cd Yolov4/build
cmake ..
make -j
PINTOPINTO
python3 demo_darknet2onnx.py \
yolov4-crowdhuman-640x480.cfg \
yolov4-crowdhuman-640x480_best.weights \
people.jpg \
1
PINTOPINTO
python3 demo_darknet2onnx.py \
yolov4-tiny-3l-crowdhuman-640x480.cfg \
yolov4-tiny-3l-crowdhuman-640x480_best.weights \
people.jpg \
1
PINTOPINTO
python3 -m onnxsim \
yolov4_1_3_480_640_static.onnx \
yolov4_tiny_3l_headdetection_480x640.onnx