Open1
YOLOフォーマットの頭部検出用アノテーションデータを一括生成するスクリプト
demo_yolov7_onnx.py
#!/usr/bin/env python
import os
import copy
import glob
import cv2
import numpy as np
import onnxruntime
from argparse import ArgumentParser
from typing import Tuple, Optional, List
from tqdm import tqdm
class YOLOv7ONNX(object):
def __init__(
self,
model_path: Optional[str] = 'yolov7_tiny_head_0.768_post_480x640.onnx',
class_score_th: Optional[float] = 0.30,
providers: Optional[List] = [
(
'TensorrtExecutionProvider', {
'trt_engine_cache_enable': True,
'trt_engine_cache_path': '.',
'trt_fp16_enable': True,
}
),
'CUDAExecutionProvider',
'CPUExecutionProvider',
],
):
"""YOLOv7ONNX
Parameters
----------
model_path: Optional[str]
ONNX file path for YOLOv7
class_score_th: Optional[float]
class_score_th: Optional[float]
Score threshold. Default: 0.30
providers: Optional[List]
Name of onnx execution providers
Default:
[
(
'TensorrtExecutionProvider', {
'trt_engine_cache_enable': True,
'trt_engine_cache_path': '.',
'trt_fp16_enable': True,
}
),
'CUDAExecutionProvider',
'CPUExecutionProvider',
]
"""
# Threshold
self.class_score_th = class_score_th
# Model loading
session_option = onnxruntime.SessionOptions()
session_option.log_severity_level = 3
self.onnx_session = onnxruntime.InferenceSession(
model_path,
sess_options=session_option,
providers=providers,
)
self.providers = self.onnx_session.get_providers()
self.input_shapes = [
input.shape for input in self.onnx_session.get_inputs()
]
self.input_names = [
input.name for input in self.onnx_session.get_inputs()
]
self.output_names = [
output.name for output in self.onnx_session.get_outputs()
]
def __call__(
self,
image: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray]:
"""YOLOv7ONNX
Parameters
----------
image: np.ndarray
Entire image
Returns
-------
face_boxes: np.ndarray
Predicted face boxes: [facecount, y1, x1, y2, x2]
face_scores: np.ndarray
Predicted face box scores: [facecount, score]
"""
temp_image = copy.deepcopy(image)
# PreProcess
resized_image = self.__preprocess(
temp_image,
)
# Inference
inferece_image = np.asarray([resized_image], dtype=np.float32)
scores, boxes = self.onnx_session.run(
self.output_names,
{input_name: inferece_image for input_name in self.input_names},
)
# PostProcess
face_boxes, face_scores = self.__postprocess(
image=temp_image,
scores=scores,
boxes=boxes,
)
return face_boxes, face_scores
def __preprocess(
self,
image: np.ndarray,
swap: Optional[Tuple[int,int,int]] = (2, 0, 1),
) -> np.ndarray:
"""__preprocess
Parameters
----------
image: np.ndarray
Entire image
swap: tuple
HWC to CHW: (2,0,1)
CHW to HWC: (1,2,0)
HWC to HWC: (0,1,2)
CHW to CHW: (0,1,2)
Returns
-------
resized_image: np.ndarray
Resized and normalized image.
"""
# Normalization + BGR->RGB
resized_image = cv2.resize(
image,
(
int(self.input_shapes[0][3]),
int(self.input_shapes[0][2]),
)
)
resized_image = np.divide(resized_image, 255.0)
resized_image = resized_image[..., ::-1]
resized_image = resized_image.transpose(swap)
resized_image = np.ascontiguousarray(
resized_image,
dtype=np.float32,
)
return resized_image
def __postprocess(
self,
image: np.ndarray,
scores: np.ndarray,
boxes: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray]:
"""__postprocess
Parameters
----------
image: np.ndarray
Entire image.
scores: np.ndarray
float32[N, 1]
boxes: np.ndarray
int64[N, 6]
Returns
-------
faceboxes: np.ndarray
Predicted face boxes: [facecount, y1, x1, y2, x2]
facescores: np.ndarray
Predicted face box confs: [facecount, score]
"""
image_height = image.shape[0]
image_width = image.shape[1]
"""
Head Detector is
N -> Number of boxes detected
batchno -> always 0: BatchNo.0
classid -> always 0: "Head"
scores: float32[N,1],
batchno_classid_y1x1y2x2: int64[N,6],
"""
scores = scores
keep_idxs = scores[:, 0] > self.class_score_th
scores_keep = scores[keep_idxs, :]
boxes_keep = boxes[keep_idxs, :]
faceboxes = []
facescores = []
if len(boxes_keep) > 0:
for box, score in zip(boxes_keep, scores_keep):
x_min = int(max(box[3], 0) * image_width / self.input_shapes[0][3])
y_min = int(max(box[2], 0) * image_height / self.input_shapes[0][2])
x_max = int(min(box[5], self.input_shapes[0][3]) * image_width / self.input_shapes[0][3])
y_max = int(min(box[4], self.input_shapes[0][2]) * image_height / self.input_shapes[0][2])
faceboxes.append(
[x_min, y_min, x_max, y_max]
)
facescores.append(
score
)
return np.asarray(faceboxes), np.asarray(facescores)
def main():
parser = ArgumentParser()
parser.add_argument(
'-m',
'--model',
type=str,
default='yolov7_tiny_head_0.768_post_480x640.onnx',
)
parser.add_argument(
'-i',
'--image_folder',
type=str,
default='images',
)
args = parser.parse_args()
model = YOLOv7ONNX(
model_path=args.model,
)
img_path_list = sorted(glob.glob('images/*'))
for img_path in tqdm(img_path_list, dynamic_ncols=True):
image = cv2.imread(img_path)
debug_image = copy.deepcopy(image)
face_boxes, face_scores = model(debug_image)
basename = os.path.basename(img_path)
txt_file = os.path.join('output', basename.replace('.jpg', '.txt').replace('.PNG', '.txt'))
os.makedirs('output', exist_ok=True)
with open(txt_file, 'a') as out_file:
for face_box, face_score in zip(face_boxes, face_scores):
cv2.rectangle(
debug_image,
(face_box[0], face_box[1]),
(face_box[2], face_box[3]),
(255,255,255),
2,
)
cv2.rectangle(
debug_image,
(face_box[0], face_box[1]),
(face_box[2], face_box[3]),
(0,255,0),
1,
)
cv2.putText(
debug_image,
f'{face_score[0]:.2f}',
(
face_box[0],
face_box[1]-10 if face_box[1]-10 > 0 else 10
),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
cv2.LINE_AA,
)
cv2.putText(
debug_image,
f'{face_score[0]:.2f}',
(
face_box[0],
face_box[1]-10 if face_box[1]-10 > 0 else 10
),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 255, 0),
1,
cv2.LINE_AA,
)
image_height = int(image.shape[0])
image_width = int(image.shape[1])
cx: float = (face_box[0] + face_box[2]) / 2.0 / image_width
cy: float = (face_box[1] + face_box[3]) / 2.0 / image_height
w: float = abs((face_box[2] - face_box[0])) / image_width * 1.18
h: float = abs((face_box[3] - face_box[1])) / image_height * 1.14
class_id = 0
out_file.write(f"{class_id} {cx} {cy} {w} {h}\n")
# cv2.imwrite(f'output/{basename}', debug_image)
cv2.imwrite(f'output/{basename}', image)
cv2.imshow("test", debug_image)
key = cv2.waitKey(1)
if key == 27: # ESC
break
cv2.destroyAllWindows()
if __name__ == "__main__":
main()