Open13
Lite-HRNet の ONNXエクスポート + バッチサイズ可変設定の試行錯誤
git clone https://github.com/HRNet/Lite-HRNet.git && cd Lite-HRNet
docker run --gpus all --rm -it \
--shm-size=10g \
-v `pwd`:/mmaction2/data \
--name mmaction2_exec_env \
pinto0309/mmaction2_exec_env:latest
cd data
sudo wget https://github.com/codylcs/lite_hrnet_onnx/raw/main/litehrnet.py -O /opt/conda/lib/python3.7/site-packages/mmpose/models/backbones/litehrnet.py
wget https://github.com/codylcs/lite_hrnet_onnx/raw/main/pytorchtoonnx.py
sudo sed -i 's/-1/groups*channels_per_group/g' /opt/conda/lib/python3.7/site-packages/mmpose/models/backbones/utils/channel_shuffle.py
PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
DATASET=coco
H=256
W=192
MODEL=litehrnet_18_${DATASET}_${H}x${W}
sed -i 's/TopDownSimpleHead/TopdownHeatmapSimpleHead/g' configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}.pth \
--output-file ${MODEL}.onnx \
--shape 1 3 ${H} ${W}
python3 -m onnxsim ${MODEL}.onnx ${MODEL}.onnx
DATASET=coco
H=384
W=288
MODEL=litehrnet_18_${DATASET}_${H}x${W}
sed -i 's/TopDownSimpleHead/TopdownHeatmapSimpleHead/g' configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}.pth \
--output-file ${MODEL}.onnx \
--shape 1 3 ${H} ${W}
python3 -m onnxsim ${MODEL}.onnx ${MODEL}.onnx
DATASET=coco
H=256
W=192
MODEL=litehrnet_30_${DATASET}_${H}x${W}
sed -i 's/TopDownSimpleHead/TopdownHeatmapSimpleHead/g' configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}.pth \
--output-file ${MODEL}.onnx \
--shape 1 3 ${H} ${W}
python3 -m onnxsim ${MODEL}.onnx ${MODEL}.onnx
DATASET=coco
H=384
W=288
MODEL=litehrnet_30_${DATASET}_${H}x${W}
sed -i 's/TopDownSimpleHead/TopdownHeatmapSimpleHead/g' configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}.pth \
--output-file ${MODEL}.onnx \
--shape 1 3 ${H} ${W}
python3 -m onnxsim ${MODEL}.onnx ${MODEL}.onnx
DATASET=mpii
H=256
W=256
MODEL=litehrnet_18_${DATASET}_${H}x${W}
sed -i 's/TopDownSimpleHead/TopdownHeatmapSimpleHead/g' configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}.pth \
--output-file ${MODEL}.onnx \
--shape 1 3 ${H} ${W}
python3 -m onnxsim ${MODEL}.onnx ${MODEL}.onnx
DATASET=mpii
H=256
W=256
MODEL=litehrnet_30_${DATASET}_${H}x${W}
sed -i 's/TopDownSimpleHead/TopdownHeatmapSimpleHead/g' configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}.pth \
--output-file ${MODEL}.onnx \
--shape 1 3 ${H} ${W}
python3 -m onnxsim ${MODEL}.onnx ${MODEL}.onnx
https://github.com/HRNet/Lite-HRNet/issues/67
https://github.com/codylcs/lite_hrnet_onnx
File "/opt/conda/lib/python3.7/site-packages/mmcv/utils/registry.py", line 246, in _register_module
raise KeyError(f'{name} is already registered '
KeyError: 'LiteHRNet is already registered in models'
File "/opt/conda/lib/python3.7/site-packages/mmcv/utils/registry.py", line 55, in build_from_cfg
raise type(e)(f'{obj_cls.__name__}: {e}')
KeyError: "TopDown: 'TopDownSimpleHead is not in the models registry'"
pytorchtoonnx.py
torch.onnx.export(
model,
one_img,
output_file,
export_params=True,
keep_initializers_as_inputs=True,
verbose=show,
opset_version=opset_version,
dynamic_axes={
'input.1': {0: '-1'},
'5922': {0: '-1'},
}
)
DATASET=coco
H=256
W=192
MODEL=litehrnet_18_${DATASET}
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}_${H}x${W}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}_${H}x${W}.pth \
--output-file ${MODEL}_Nx${H}x${W}.onnx \
--shape 1 3 ${H} ${W}
DATASET=coco
H=384
W=288
MODEL=litehrnet_18_${DATASET}
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}_${H}x${W}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}_${H}x${W}.pth \
--output-file ${MODEL}_Nx${H}x${W}.onnx \
--shape 1 3 ${H} ${W}
DATASET=mpii
H=256
W=256
MODEL=litehrnet_18_${DATASET}
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}_${H}x${W}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}_${H}x${W}.pth \
--output-file ${MODEL}_Nx${H}x${W}.onnx \
--shape 1 3 ${H} ${W}
pytorchtoonnx.py
torch.onnx.export(
model,
one_img,
output_file,
export_params=True,
keep_initializers_as_inputs=True,
verbose=show,
opset_version=opset_version,
dynamic_axes={
'input.1': {0: '-1'},
'10242': {0: '-1'},
}
)
DATASET=coco
H=256
W=192
MODEL=litehrnet_30_${DATASET}
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}_${H}x${W}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}_${H}x${W}.pth \
--output-file ${MODEL}_Nx${H}x${W}.onnx \
--shape 1 3 ${H} ${W}
DATASET=coco
H=384
W=288
MODEL=litehrnet_30_${DATASET}
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}_${H}x${W}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}_${H}x${W}.pth \
--output-file ${MODEL}_Nx${H}x${W}.onnx \
--shape 1 3 ${H} ${W}
DATASET=mpii
H=256
W=256
MODEL=litehrnet_30_${DATASET}
python3 pytorchtoonnx.py \
--config configs/top_down/lite_hrnet/${DATASET}/${MODEL}_${H}x${W}.py \
--checkpoint checkpoints/${DATASET}/${MODEL}_${H}x${W}.pth \
--output-file ${MODEL}_Nx${H}x${W}.onnx \
--shape 1 3 ${H} ${W}
batchsize_clear.py
import onnx
import os
import struct
from argparse import ArgumentParser
def rebatch(infile, outfile, batch_size):
model = onnx.load(infile)
graph = model.graph
# Change batch size in input, output and value_info
for tensor in list(graph.input) + list(graph.value_info) + list(graph.output):
tensor.type.tensor_type.shape.dim[0].dim_param = batch_size
# Set dynamic batch size in reshapes (-1)
for node in graph.node:
if node.op_type != 'Reshape':
continue
for init in graph.initializer:
# node.input[1] is expected to be a reshape
if init.name != node.input[1]:
continue
# Shape is stored as a list of ints
if len(init.int64_data) > 0:
# This overwrites bias nodes' reshape shape but should be fine
init.int64_data[0] = -1
# Shape is stored as bytes
elif len(init.raw_data) > 0:
shape = bytearray(init.raw_data)
struct.pack_into('q', shape, 0, -1)
init.raw_data = bytes(shape)
onnx.save(model, outfile)
if __name__ == '__main__':
parser = ArgumentParser('Replace batch size with \'N\'')
parser.add_argument('infile')
parser.add_argument('outfile')
args = parser.parse_args()
rebatch(args.infile, args.outfile, '-1')
python3 batchsize_clear.py litehrnet_18_coco_Nx256x192.onnx litehrnet_18_coco_Nx256x192.onnx
python3 batchsize_clear.py litehrnet_18_coco_Nx384x288.onnx litehrnet_18_coco_Nx384x288.onnx
python3 batchsize_clear.py litehrnet_18_mpii_Nx256x256.onnx litehrnet_18_mpii_Nx256x256.onnx
python3 batchsize_clear.py litehrnet_30_coco_Nx256x192.onnx litehrnet_30_coco_Nx256x192.onnx
python3 batchsize_clear.py litehrnet_30_coco_Nx384x288.onnx litehrnet_30_coco_Nx384x288.onnx
python3 batchsize_clear.py litehrnet_30_mpii_Nx256x256.onnx litehrnet_30_mpii_Nx256x256.onnx
- PyTorch から ONNX へエクスポートする時点でバッチサイズを可変に設定しておかないと、Conv2Dのバッチサイズが固定サイズ「1」で埋め込まれてしまう。
- PyTorch から ONNX へエクスポートする時点でバッチサイズを「-1」に設定しておいて、
batchsize_clear.py
スクリプトで上書きでバッチサイズを「-1」に更新しないとReshape
のバッチサイズが固定「1」になってしまう。 -
Reshape
のバッチサイズ以外の次元で、元から「-1」になっている次元がひとつでもある場合、onnxruntimeで推論をする時点でランタイムエラーになるため、最初にPyTorchからONNXへエクスポートする時点でReshape
のバッチサイズ以外の次元をすべて固定サイズに書き換えておく必要がある。
batchsize_N_inference_test.py
import onnxruntime
import numpy as np
onnx_session = onnxruntime.InferenceSession(
'litehrnet_18_coco_Nx256x192.onnx',
providers=['CPUExecutionProvider']
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('input.shape:', onnx_session.get_inputs()[0].shape)
# ['-1', 3, 256, 192]
results = onnx_session.run(
None,
{input_name: np.ones(([5, 3, 256, 192]), dtype=np.float32)},
)
print('results.shape:', results[0].shape)
# (5, 17, 96, 72)
mmpose にプルリクを発行
-
環境
- Ubuntu 20.04.3 LTS
- Intel(R) Core(TM) i9-10900K CPU @ 3.70GHz
- NVIDIA GeForce RTX 3070
- N batch: litehrnet_18_coco_Nx256x192.onnx.zip
- 1 batch: litehrnet_18_coco_256x192.onnx.zip
-
onnxの最終最適化
git clone https://github.com/microsoft/onnxruntime.git \
&& cd onnxruntime \
&& git checkout 5cbacec854f151549f13b4be0a19c65c01a5728d
python tools/python/remove_initializer_from_input.py \
--input ../litehrnet_18_coco_Nx256x192.onnx \
--output ../litehrnet_18_coco_Nx256x192.onnx
python tools/python/remove_initializer_from_input.py \
--input ../litehrnet_18_coco_256x192.onnx \
--output ../litehrnet_18_coco_256x192.onnx
cd ..
- 推論パフォーマンスのテスト
import onnxruntime as ort
import numpy as np
import time
MODEL_FILE1 = 'litehrnet_18_coco_Nx256x192.onnx'
MODEL_FILE2 = 'litehrnet_18_coco_256x192.onnx'
#============================================================================
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ TensorRT test')
session_option = ort.SessionOptions()
session_option.log_severity_level = 4
# Batch inference x1
onnx_session = ort.InferenceSession(
MODEL_FILE1,
sess_options=session_option,
providers=[
(
'TensorrtExecutionProvider', {
'trt_engine_cache_enable': True,
'trt_fp16_enable': True,
}
),
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Batch inference x1')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
# Single inference x10
onnx_session = ort.InferenceSession(
MODEL_FILE2,
sess_options=session_option,
providers=[
(
'TensorrtExecutionProvider', {
'trt_engine_cache_enable': True,
'trt_fp16_enable': True,
}
),
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Single inference x10')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
for i in range(10):
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
#============================================================================
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ CUDA test')
session_option1 = ort.SessionOptions()
session_option1.log_severity_level = 4
session_option1.optimized_model_filepath = f"{MODEL_FILE1}_cudaopt.onnx"
session_option1.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
session_option2 = ort.SessionOptions()
session_option2.log_severity_level = 4
session_option2.optimized_model_filepath = f"{MODEL_FILE2}_cudaopt.onnx"
session_option2.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
# Batch inference x1
onnx_session = ort.InferenceSession(
MODEL_FILE1,
sess_options=session_option1,
providers=[
'CUDAExecutionProvider',
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Batch inference x1')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
# Single inference x10
onnx_session = ort.InferenceSession(
MODEL_FILE2,
sess_options=session_option2,
providers=[
'CUDAExecutionProvider',
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Single inference x10')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
for i in range(10):
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
#============================================================================
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ONNX "CPU" test')
session_option = ort.SessionOptions()
session_option.log_severity_level = 4
# Batch inference x1
onnx_session = ort.InferenceSession(
MODEL_FILE1,
sess_options=session_option,
providers=[
'CPUExecutionProvider',
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Batch inference x1')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
# Single inference x10
onnx_session = ort.InferenceSession(
MODEL_FILE2,
sess_options=session_option,
providers=[
'CPUExecutionProvider',
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Single inference x10')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
for i in range(10):
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
#============================================================================
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ OpenVINO "CPU" test')
MODEL = 'litehrnet_18_coco'
BATCH=10
H = 256
W = 192
MODEL_PATH1 = f'{MODEL}_{BATCH}x{H}x{W}/openvino/FP16/{MODEL}_{BATCH}x{H}x{W}.xml'
MODEL_PATH2 = f'{MODEL}_{H}x{W}/openvino/FP16/{MODEL}_{H}x{W}.xml'
"""
MODEL=litehrnet_18_coco
H=256
W=192
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_${H}x${W}.onnx \
--data_type FP32 \
--output_dir ${MODEL}_${H}x${W}/openvino/FP32 \
--model_name ${MODEL}_${H}x${W}
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_${H}x${W}.onnx \
--data_type FP16 \
--output_dir ${MODEL}_${H}x${W}/openvino/FP16 \
--model_name ${MODEL}_${H}x${W}
BATCH=10
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_Nx${H}x${W}.onnx \
--data_type FP32 \
--batch 10 \
--output_dir ${MODEL}_${BATCH}x${H}x${W}/openvino/FP32 \
--model_name ${MODEL}_${BATCH}x${H}x${W}
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_Nx${H}x${W}.onnx \
--data_type FP16 \
--batch 10 \
--output_dir ${MODEL}_${BATCH}x${H}x${W}/openvino/FP16 \
--model_name ${MODEL}_${BATCH}x${H}x${W}
"""
from openvino.inference_engine import IECore
ie = IECore()
# Batch inference x1
net = ie.read_network(model=MODEL_PATH1)
input_blob = next(iter(net.input_info))
out_blob = next(iter(net.outputs))
input_shape = net.input_info[input_blob].input_data.shape
input_height = input_shape[2]
input_width = input_shape[3]
exec_net = ie.load_network(network=net, device_name='CPU',)
print('@@@@@@@@@@ Batch inference x1')
# Warm up
results = exec_net.infer(
{input_blob: np.ones([BATCH, 3, 256, 192])}
)['5966']
# Inference
start = time.time()
print(f'input.shape: {net.input_info[input_blob].input_data.shape}')
results = exec_net.infer(
{input_blob: np.ones([BATCH, 3, 256, 192])}
)['5966']
print(f'results.shape: {results.shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
# Single inference x10
net = ie.read_network(model=MODEL_PATH2)
input_blob = next(iter(net.input_info))
out_blob = next(iter(net.outputs))
input_shape = net.input_info[input_blob].input_data.shape
input_height = input_shape[2]
input_width = input_shape[3]
exec_net = ie.load_network(network=net, device_name='CPU',)
print('@@@@@@@@@@ Single inference x10')
# Warm up
results = exec_net.infer(
{input_blob: np.ones([1, 3, 256, 192])}
)['5966']
# Inference
start = time.time()
print(f'input.shape: {net.input_info[input_blob].input_data.shape}')
for i in range(10):
results = exec_net.infer(
{input_blob: np.ones([1, 3, 256, 192])}
)['5966']
print(f'results.shape: {results.shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
#============================================================================
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ OpenVINO Execution Provider "CPU" test')
session_option = ort.SessionOptions()
session_option.log_severity_level = 4
session_option.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
ort.capi._pybind_state.set_openvino_device('CPU_FP32')
# Batch inference x1
onnx_session = ort.InferenceSession(
'litehrnet_18_coco_Nx256x192_opt.onnx',
sess_options=session_option,
providers=[
'OpenVINOExecutionProvider',
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Batch inference x1')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
results = onnx_session.run(
None,
{input_name: np.ones(([10, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
# Single inference x10
onnx_session = ort.InferenceSession(
'litehrnet_18_coco_256x192_opt.onnx',
sess_options=session_option,
providers=[
'OpenVINOExecutionProvider',
],
)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name
print('@@@@@@@@@@ Single inference x10')
# Warm up
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
# Inference
print(f'input.shape: {onnx_session.get_inputs()[0].shape}')
start = time.time()
for i in range(10):
results = onnx_session.run(
None,
{input_name: np.ones(([1, 3, 256, 192]), dtype=np.float32)},
)
print(f'results.shape: {results[0].shape}')
print(f'elapsed time: {(time.time()-start)*1000} ms')
print()
- 結果
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ TensorRT test
@@@@@@@@@@ Batch inference x1
input.shape: ['-1', 3, 256, 192]
results.shape: (10, 17, 64, 48)
elapsed time: 9.258270263671875 ms
@@@@@@@@@@ Single inference x10
input.shape: [1, 3, 256, 192]
results.shape: (1, 17, 64, 48)
elapsed time: 34.79766845703125 ms
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ CUDA test
@@@@@@@@@@ Batch inference x1
input.shape: ['-1', 3, 256, 192]
results.shape: (10, 17, 64, 48)
elapsed time: 25.88963508605957 ms
@@@@@@@@@@ Single inference x10
input.shape: [1, 3, 256, 192]
results.shape: (1, 17, 64, 48)
elapsed time: 79.52594757080078 ms
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ CPU test
@@@@@@@@@@ Batch inference x1
input.shape: ['-1', 3, 256, 192]
results.shape: (10, 17, 64, 48)
elapsed time: 107.6958179473877 ms
@@@@@@@@@@ Single inference x10
input.shape: [1, 3, 256, 192]
results.shape: (1, 17, 64, 48)
elapsed time: 116.13655090332031 ms
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ OpenVINO "CPU" test
@@@@@@@@@@ Batch inference x1
input.shape: [10, 3, 256, 192]
results.shape: (10, 17, 64, 48)
elapsed time: 47.75524139404297 ms
@@@@@@@@@@ Single inference x10
input.shape: [1, 3, 256, 192]
results.shape: (1, 17, 64, 48)
elapsed time: 82.13305473327637 ms
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ OpenVINO Execution Provider "CPU" test
@@@@@@@@@@ Batch inference x1
input.shape: ['-1', 3, 256, 192]
results.shape: (10, 17, 64, 48)
elapsed time: 49.6370792388916 ms
@@@@@@@@@@ Single inference x10
input.shape: [1, 3, 256, 192]
results.shape: (1, 17, 64, 48)
elapsed time: 80.70492744445801 ms