Open14
Detectron2 ベースの BodyHands のONNXエクスポート試行
git clone https://github.com/cvlab-stonybrook/BodyHands.git
cd BodyHands
Dockerfile
FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && apt-get install -y \
ca-certificates \
python3-dev \
git \
wget \
sudo \
ninja-build \
python-is-python3 \
python3-pip \
libgl1-mesa-dev \
libglib2.0-0 \
libsm6 \
libxrender1 \
libxext-dev \
nano \
&& sed -i 's/# set linenumbers/set linenumbers/g' /etc/nanorc \
&& apt clean \
&& rm -rf /var/lib/apt/lists/*
# create a non-root user
ENV USERNAME=user
RUN echo "root:root" | chpasswd \
&& adduser --disabled-password --gecos "" "${USERNAME}" \
&& echo "${USERNAME}:${USERNAME}" | chpasswd \
&& echo "%${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers.d/${USERNAME} \
&& chmod 0440 /etc/sudoers.d/${USERNAME} \
&& mkdir -p /home/${USERNAME}
USER ${USERNAME}
# RUN echo HOME: ${HOME}
# RUN echo PWD: `pwd`
ENV HOME=/home/${USERNAME}
WORKDIR ${HOME}
ENV PATH="${HOME}/.local/bin:${PATH}"
# install dependencies
# See https://pytorch.org/ for other options if you use a different version of CUDA
RUN pip install --user pip -U
RUN pip install --user tensorboard cmake onnx # cmake from apt-get is too old
RUN pip install --user torch==1.10 torchvision==0.11.1 -f https://download.pytorch.org/whl/cu111/torch_stable.html
RUN pip install opencv-python==4.1.2.30 scipy scikit-image
RUN pip install --user 'git+https://github.com/facebookresearch/fvcore'
# install detectron2
RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo
# set FORCE_CUDA because during `docker build` cuda is not accessible
ENV FORCE_CUDA="1"
# This will by default build detectron2 for all common cuda architectures and take a lot more time,
# because inside `docker build`, there is no way to tell which architecture will be used.
ARG TORCH_CUDA_ARCH_LIST="Ampere"
ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
RUN pip install --user -e detectron2_repo
# Set a fixed model cache directory.
ENV FVCORE_CACHE="/tmp"
WORKDIR ${HOME}/detectron2_repo
RUN pip install --user torch==1.13.1 torchvision==0.14.1 --extra-index-url https://download.pytorch.org/whl/cu116
RUN echo "export QT_X11_NO_MITSHM=1" >> ${HOME}/.bashrc \
&& echo "sudo chmod 777 /dev/video*" >> ${HOME}/.bashrc
docker build -t pinto0309/bodyhands:latest .
xhost +local: && \
docker run -it --rm --gpus all \
-v $PWD:/home/user/detectron2_repo/BodyHands \
-v /tmp/.X11-unix/:/tmp/.X11-unix:rw \
--device /dev/video0:/dev/video0:mwr \
--net=host \
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR \
-e DISPLAY=$DISPLAY \
--privileged \
pinto0309/bodyhands:latest
cd BodyHands
python demo_cam.py
demo_cam.py
import argparse
import os
import cv2
import torch
from detectron2.data import MetadataCatalog
from detectron2.modeling import build_model
from detectron2.config import get_cfg
import detectron2.data.transforms as T
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.data import MetadataCatalog
from detectron2.modeling import build_model
from bodyhands import *
from datasets import *
from bodyhands import add_bodyhands_config
from bodyhands import CustomVisualizer
import copy
class CustomPredictor:
def __init__(self, cfg):
self.cfg = cfg.clone()
self.model = build_model(self.cfg)
self.model.eval()
self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
checkpointer = DetectionCheckpointer(self.model)
checkpointer.load(cfg.MODEL.WEIGHTS)
self.input_format = cfg.INPUT.FORMAT
assert self.input_format in ["RGB", "BGR"], self.input_format
def __call__(self, original_image):
with torch.no_grad():
if self.input_format == "RGB":
original_image = original_image[:, :, ::-1]
height, width = original_image.shape[:2]
image = torch.as_tensor(original_image.astype("float32").transpose(2, 0, 1))
inputs = {"image": image, "height": height, "width": width}
predictions = self.model([inputs], height, width)[0]
return predictions
def prepareModel(cfg_file, weights, thresh):
cfg = get_cfg()
add_bodyhands_config(cfg)
cfg.merge_from_file(cfg_file)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = thresh
cfg.MODEL.WEIGHTS = os.path.abspath(weights)
predictor = CustomPredictor(cfg)
return predictor
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Arguments for evaluation')
parser.add_argument(
'--thresh',
required=False,
metavar='threshold for hand detections', \
help='hand detection score threshold',
default=0.7,
)
args = parser.parse_args()
out_path = os.path.abspath('./demoOutput/')
if not os.path.exists(out_path):
os.mkdir(out_path)
roi_score_thresh = float(args.thresh)
model = prepareModel('./configs/BodyHands.yaml', './models/model.pth', roi_score_thresh)
cap_device = 0
cap = cv2.VideoCapture(cap_device)
cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
cap_fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc('m','p','4','v')
video_writer = cv2.VideoWriter(
filename='output.mp4',
fourcc=fourcc,
fps=15,
frameSize=(cap_width, cap_height),
)
while True:
ret, im = cap.read()
if not ret:
break
debug_image = copy.deepcopy(im)
debug_image = cv2.resize(debug_image, (cap_width, cap_height))
outputs = model(debug_image)
v = CustomVisualizer(debug_image[:, :, ::-1], MetadataCatalog.get("HandBodyContactHands_sub"), scale=1.0)
#######################################################################################
outputs = outputs["instances"].to("cpu")
classes = outputs.pred_classes
body_ids = outputs.pred_body_ids
boxes = outputs.pred_boxes.tensor
masks = outputs.pred_masks
hand_indices = classes == 0
body_indices = classes == 1
hand_boxes = boxes[hand_indices]
hand_masks = masks[hand_indices]
hand_body_ids = body_ids[hand_indices]
body_boxes = boxes[body_indices]
body_body_ids = body_ids[body_indices]
num_hands, num_bodies = hand_boxes.shape[0], body_boxes.shape[0]
body_masks = []
for body_no in range(num_bodies):
box = body_boxes[body_no].view(-1).cpu().numpy()
xmin, ymin, xmax, ymax = box
body_poly = [[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin)]]
body_masks.append(body_poly)
########################################################################################
v = v.modified_draw_instance_predictions(hand_boxes, hand_masks, hand_body_ids, body_boxes, body_masks, body_body_ids)
out = v.get_image()[:, :, ::-1]
print(out.shape)
video_writer.write(out)
cv2.imshow(f'BodyHands', out)
key = cv2.waitKey(1)
if key == 27: # ESC
break
if video_writer:
video_writer.release()
if cap:
cap.release()
cv2.destroyAllWindows()
.devcontainer/devcontainer.json
{
"name": "Python 3",
"image": "pinto0309/bodyhands:latest",
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"python.defaultInterpreterPath": "/usr/local/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
"remoteUser": "appuser",
"features": {
"github-cli": "latest"
},
"runArgs": [
"--gpus", "all",
"--shm-size", "64gb",
"--device", "/dev/video0:/dev/video0:mwr",
"-v", "${localWorkspaceFolder}:/home/appuser/detectron2_repo/BodyHands",
"--privileged"
]
}
エクスポート用ロジック(デモコードを少し改造)
demo_cam.py
import argparse
import os
import cv2
import torch
from detectron2.data import MetadataCatalog
from detectron2.modeling import build_model
from detectron2.config import get_cfg
import detectron2.data.transforms as T
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.data import MetadataCatalog
from detectron2.modeling import build_model
from bodyhands import *
from datasets import *
from bodyhands import add_bodyhands_config
from bodyhands import CustomVisualizer
import copy
class CustomPredictor:
def __init__(self, cfg):
self.cfg = cfg.clone()
self.model = build_model(self.cfg)
self.model.eval()
self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
checkpointer = DetectionCheckpointer(self.model)
checkpointer.load(cfg.MODEL.WEIGHTS)
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@ cfg.MODEL.WEIGHTS:{cfg.MODEL.WEIGHTS}')
self.input_format = cfg.INPUT.FORMAT
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@ cfg.INPUT.FORMAT:{cfg.INPUT.FORMAT}')
# print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@ checkpointer.model:{checkpointer.model}')
# print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@ self.model:{self.model}')
# print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@ self.model.__call__:{self.model.__call__}')
def inference_func(model, image):
inputs = [{"image": image, "height": image.shape[1], "width": image.shape[2]}]
# inputs = [{"image": image, "height": H, "width": W}]
# return model.inference(inputs, height=480, width=640, do_postprocess=True)[0]["instances"]
# return model.inference(inputs, height=480, width=640, do_postprocess=False)[0]["instances"]
return model.inference(inputs, height=480, width=640, do_postprocess=False)
import onnx # isort:skip
import io
from detectron2.export.flatten import TracingAdapter
H=480
W=640
# inputs = {"image": x, "height": H, "width": W}
# inputs = {"image": x}
from torchvision.io import read_image
inputs = read_image(path='teaser.jpeg')
import torchvision.transforms.functional as F
inputs = F.resize(img=inputs, size=(H, W))
f = io.BytesIO()
adapter_model = TracingAdapter(self.model, inputs, inference_func)
adapter_model.eval()
with torch.no_grad():
try:
torch.onnx.enable_log()
except AttributeError:
# Older ONNX versions does not have this API
pass
torch.onnx.export(
adapter_model,
adapter_model.flattened_inputs,
f,
training=torch.onnx.TrainingMode.EVAL,
opset_version=11,
verbose=True,
)
onnx_model = onnx.load_from_string(f.getvalue())
assert onnx_model is not None
onnx.save(onnx_model, f'bodyhands_{H}x{W}.onnx')
import sys
sys.exit(0)
assert self.input_format in ["RGB", "BGR"], self.input_format
def __call__(self, original_image):
with torch.no_grad():
if self.input_format == "RGB":
original_image = original_image[:, :, ::-1]
height, width = original_image.shape[:2]
image = torch.as_tensor(original_image.astype("float32").transpose(2, 0, 1))
inputs = {"image": image, "height": height, "width": width}
predictions = self.model([inputs], height, width)[0]
return predictions
def prepareModel(cfg_file, weights, thresh):
cfg = get_cfg()
add_bodyhands_config(cfg)
cfg.merge_from_file(cfg_file)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = thresh
cfg.MODEL.WEIGHTS = os.path.abspath(weights)
predictor = CustomPredictor(cfg)
return predictor
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Arguments for evaluation')
parser.add_argument(
'--thresh',
required=False,
metavar='threshold for hand detections', \
help='hand detection score threshold',
default=0.7,
)
args = parser.parse_args()
out_path = os.path.abspath('./demoOutput/')
if not os.path.exists(out_path):
os.mkdir(out_path)
roi_score_thresh = float(args.thresh)
model = prepareModel('./configs/BodyHands.yaml', './models/model.pth', roi_score_thresh)
cap_device = 0
cap = cv2.VideoCapture(cap_device)
cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
cap_fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc('m','p','4','v')
video_writer = cv2.VideoWriter(
filename='output.mp4',
fourcc=fourcc,
fps=15,
frameSize=(cap_width, cap_height),
)
while True:
ret, im = cap.read()
if not ret:
break
debug_image = copy.deepcopy(im)
debug_image = cv2.resize(debug_image, (cap_width, cap_height))
outputs = model(debug_image)
v = CustomVisualizer(debug_image[:, :, ::-1], MetadataCatalog.get("HandBodyContactHands_sub"), scale=1.0)
#######################################################################################
outputs = outputs["instances"].to("cpu")
classes = outputs.pred_classes
body_ids = outputs.pred_body_ids
boxes = outputs.pred_boxes.tensor
masks = outputs.pred_masks
hand_indices = classes == 0
body_indices = classes == 1
hand_boxes = boxes[hand_indices]
hand_masks = masks[hand_indices]
hand_body_ids = body_ids[hand_indices]
body_boxes = boxes[body_indices]
body_body_ids = body_ids[body_indices]
num_hands, num_bodies = hand_boxes.shape[0], body_boxes.shape[0]
body_masks = []
for body_no in range(num_bodies):
box = body_boxes[body_no].view(-1).cpu().numpy()
xmin, ymin, xmax, ymax = box
body_poly = [[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin)]]
body_masks.append(body_poly)
########################################################################################
v = v.modified_draw_instance_predictions(hand_boxes, hand_masks, hand_body_ids, body_boxes, body_masks, body_body_ids)
out = v.get_image()[:, :, ::-1]
print(out.shape)
video_writer.write(out)
cv2.imshow(f'BodyHands', out)
key = cv2.waitKey(1)
if key == 27: # ESC
break
if video_writer:
video_writer.release()
if cap:
cap.release()
cv2.destroyAllWindows()
ココの書き換えが必要。
row_ind, col_ind = linear_sum_assignment(-scores_numpy)
col_ind = torch.from_numpy(col_ind)
row_ind = torch.from_numpy(row_ind)
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ col_ind: {type(col_ind)}')
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ num_bodies: {type(num_bodies)}')
col_ind = (col_ind % (num_bodies+1)) + 1
# row_ind, col_ind = torch.from_numpy(row_ind).to(device), torch.from_numpy(col_ind).to(device)
row_ind, col_ind = row_ind.to(device), col_ind.to(device)
overlap_estimation.py
import torch
from torch import nn
from torch.nn import functional as F
from detectron2.layers import Linear, ShapeSpec, Conv2d, get_norm, cat
from detectron2.utils.registry import Registry
import numpy as np
import fvcore.nn.weight_init as weight_init
from detectron2.modeling.box_regression import Box2BoxTransform
from scipy.optimize import linear_sum_assignment
ROI_OVERLAP_ESTIMATION_HEAD_REGISTRY = Registry("ROI_OVERLAP_ESTIMATION_HEAD")
ROI_OVERLAP_ESTIMATION_HEAD_REGISTRY.__doc__ == """Registry for Overlap Estimation Module."""
def OverlapEstimationInference(cfg, handbody_components, pred_instances, device):
num_hands = handbody_components["num_hands"]
num_bodies = handbody_components["num_bodies"]
hand_indices = handbody_components["hand_indices"]
body_indices = handbody_components["body_indices"]
gt_overlap = (handbody_components["gt_ioa"] > 0).float()
if num_hands == 0:
pred_instances[0].pred_body_ids = torch.Tensor([i for i in range(1, num_bodies+1)]).to(device)
return pred_instances
if num_bodies == 0:
pred_instances[0].pred_body_ids = torch.Tensor([num_bodies+1] * num_hands).to(device)
pred_body_ids = torch.Tensor([-1.0] * (num_hands+num_bodies)).to(device)
pred_hand_boxes = handbody_components["hand_boxes"]
pred_body_boxes = handbody_components["body_boxes"]
pred_mu = handbody_components["pred_mu"]
box2box_transform = Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)
mu_hand = box2box_transform.get_deltas(
pred_hand_boxes, pred_mu
)
mu_body = [] # A list of length num_hands
scores_positional_density = []
for hand_no in range(num_hands):
hand_boxes_hand_no = pred_hand_boxes[hand_no:hand_no+1]
new_pred_body_boxes = torch.cat([pred_body_boxes, hand_boxes_hand_no], dim=0)
hand_boxes_hand_no = hand_boxes_hand_no.repeat(num_bodies+1, 1)
mu_body_hand_no = box2box_transform.get_deltas(
hand_boxes_hand_no, new_pred_body_boxes
) # (num_bodies+1, 4)
mu_hand_hand_no = mu_hand[hand_no:hand_no+1].repeat(num_bodies+1, 1)
# (Num_bodies+1, 4)
conf_hand_no = torch.exp(
-2.0 * 1e-1 * torch.sum(torch.abs(mu_hand_hand_no - mu_body_hand_no), dim=1)
)
scores_positional_density.append(conf_hand_no.reshape(1, num_bodies+1))
mu_body.append(mu_body_hand_no)
scores_positional_density = torch.cat(scores_positional_density, dim=0)
pred_overlap = handbody_components["pred_overlap"]
pred_overlap = F.sigmoid(pred_overlap)
overlap_mask = (pred_overlap > 0.1).float()
scores = pred_overlap * scores_positional_density * overlap_mask
scores = torch.cat([scores, scores], dim=1)
scores_numpy = scores.detach().to("cpu").numpy()
row_ind, col_ind = linear_sum_assignment(-scores_numpy)
col_ind = torch.from_numpy(col_ind)
row_ind = torch.from_numpy(row_ind)
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ col_ind: {type(col_ind)}')
print(f'@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ num_bodies: {type(num_bodies)}')
col_ind = (col_ind % (num_bodies+1)) + 1
# row_ind, col_ind = torch.from_numpy(row_ind).to(device), torch.from_numpy(col_ind).to(device)
row_ind, col_ind = row_ind.to(device), col_ind.to(device)
pred_body_ids_for_bodies = torch.arange(1, num_bodies+1).to(device)
pred_body_ids_for_hands = torch.FloatTensor([num_bodies+1] * num_hands).to(device)
pred_body_ids_for_hands[row_ind] = col_ind.float()
pred_body_ids[hand_indices] = pred_body_ids_for_hands
pred_body_ids[body_indices] = pred_body_ids_for_bodies.float()
pred_instances[0].pred_body_ids = pred_body_ids
return pred_instances
def OverlapEstimationLoss(pred_overlap, ioa_gt, cfg):
weight = cfg.MODEL.ROI_OVERLAP_ESTIMATION_HEAD.LOSS_WEIGHT
overlap_gt = (ioa_gt > 0).float()
loss = weight * F.binary_cross_entropy_with_logits(pred_overlap, overlap_gt, reduction="mean")
return loss
@ROI_OVERLAP_ESTIMATION_HEAD_REGISTRY.register()
class OverlapEstimationHead(nn.Module):
def __init__(self, cfg, input_shape: ShapeSpec):
super(OverlapEstimationHead, self).__init__()
conv_params = cfg.MODEL.ROI_OVERLAP_ESTIMATION_HEAD.CONV_DIMS
conv_norm = cfg.MODEL.ROI_OVERLAP_ESTIMATION_HEAD.CONV_NORM
fc_dims = cfg.MODEL.ROI_OVERLAP_ESTIMATION_HEAD.FC_DIM
num_fc = len(fc_dims)
self.cfg = cfg
self.device = cfg.MODEL.DEVICE
self._output_size = (2*input_shape.channels, input_shape.height, input_shape.width)
self.conv_norm_relus = []
for k, conv_param in enumerate(conv_params):
conv = Conv2d(
self._output_size[0],
conv_param[0],
kernel_size=conv_param[1],
padding=conv_param[2],
bias=not conv_norm,
norm=get_norm(conv_norm, conv_param[0]),
activation=F.relu,
)
self.add_module("overlap_estimation_conv{}".format(k+1), conv)
self.conv_norm_relus.append(conv)
self._output_size = (conv_param[0], self._output_size[1], self._output_size[2])
for layer in self.conv_norm_relus:
weight_init.c2_msra_fill(layer)
self.fcs = []
for k in range(num_fc):
fc = Linear(np.prod(self._output_size), fc_dims[k])
self.add_module("overlap_estimation_fc{}".format(k+1), fc)
self.fcs.append(fc)
self._output_size = fc_dims[k]
for layer in self.fcs:
weight_init.c2_xavier_fill(layer)
def forward(self, pred_mu, pred_mu_features, handbody_components, instances):
if self.training:
hand_proposal_features = handbody_components["hand_proposal_features"]
body_proposal_features = handbody_components["body_proposal_features"]
hand_proposal_boxes = handbody_components["hand_proposal_boxes"]
body_proposal_boxes = handbody_components["body_proposal_boxes"]
proposal_body_ids_hands = handbody_components["proposal_body_ids_hands"]
proposal_body_ids_bodies = handbody_components["proposal_body_ids_bodies"]
ioa_proposal_boxes = handbody_components["ioa_proposal_boxes"]
num_hands = hand_proposal_boxes.shape[0]
num_bodies = body_proposal_features.shape[0]
if num_hands ==0 or num_bodies == 0:
return {"loss overlap estimation": torch.sum(body_proposal_boxes) * 0,}
pred_overlap = []
for i in range(num_hands):
h_f = hand_proposal_features[i: i+1]
new_body_proposal_features = torch.cat([body_proposal_features, h_f], dim=0)
h_f = hand_proposal_features[i:i+1].repeat(num_bodies+1, 1, 1, 1)
hb_f = torch.cat([h_f, new_body_proposal_features], dim=1)
for num in range(len(self.conv_norm_relus)):
hb_f = self.conv_norm_relus[num](hb_f)
hb_f = torch.flatten(hb_f, start_dim=1)
for num in range(len(self.fcs)-1):
hb_f = F.relu(self.fcs[num](hb_f))
if len(self.fcs) == 1:
num = -1
hb_f = self.fcs[num+1](hb_f)
hb_f = hb_f.squeeze(1).unsqueeze(0)
pred_overlap.append(hb_f)
pred_overlap = torch.cat(pred_overlap, dim=0)
torch_ones = torch.ones(num_hands, 1).to(ioa_proposal_boxes.device)
ioa_proposal_boxes = torch.cat([ioa_proposal_boxes, torch_ones], dim=1)
return {"loss ioa prediction": OverlapEstimationLoss(pred_overlap, ioa_proposal_boxes, self.cfg),}
else:
pred_overlap = []
hand_boxes = handbody_components["hand_boxes"]
body_boxes = handbody_components["body_boxes"]
hand_features = handbody_components["hand_features"]
body_features = handbody_components["body_features"]
num_hands = hand_boxes.shape[0]
num_bodies = body_boxes.shape[0]
for i in range(num_hands):
h_f = hand_features[i: i+1]
new_body_features = torch.cat([body_features, h_f], dim=0)
h_f = hand_features[i:i+1].repeat(num_bodies+1, 1, 1, 1)
hb_f = torch.cat([h_f, new_body_features], dim=1)
for num in range(len(self.conv_norm_relus)):
hb_f = self.conv_norm_relus[num](hb_f)
hb_f = torch.flatten(hb_f, start_dim=1)
for num in range(len(self.fcs)-1):
hb_f = F.relu(self.fcs[num](hb_f))
if len(self.fcs) == 1:
num = -1
hb_f = self.fcs[num+1](hb_f)
hb_f = hb_f.squeeze(1).unsqueeze(0)
pred_overlap.append(hb_f)
if pred_overlap:
pred_overlap = torch.cat(pred_overlap, dim=0)
handbody_components["num_hands"] = num_hands
handbody_components["num_bodies"] = num_bodies
handbody_components["pred_overlap"] = pred_overlap
handbody_components["pred_mu"] = pred_mu
return OverlapEstimationInference(self.cfg, handbody_components, instances, self.device)
def build_overlap_estimation_head(cfg, input_shape):
name = cfg.MODEL.ROI_OVERLAP_ESTIMATION_HEAD.NAME
return ROI_OVERLAP_ESTIMATION_HEAD_REGISTRY.get(name)(cfg, input_shape)
ココの書き換えが必要。
# return results
return results[0]._fields
rcnn.py
from detectron2.modeling import GeneralizedRCNN
from ..postprocessing import detector_postprocess
from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
__all__ = ["ModifiedPostProcessingRCNN"]
@META_ARCH_REGISTRY.register()
class ModifiedPostProcessingRCNN(GeneralizedRCNN):
def __init__(self, cfg):
super().__init__(cfg)
def forward(self, batched_inputs, height, width):
if not self.training:
return self.inference(batched_inputs, height, width)
images = self.preprocess_image(batched_inputs)
if "instances" in batched_inputs[0]:
gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
elif "targets" in batched_inputs[0]:
log_first_n(
logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
)
gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
else:
gt_instances = None
features = self.backbone(images.tensor)
if self.proposal_generator:
proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
else:
assert "proposals" in batched_inputs[0]
proposals = [x["proposals"].to(self.device) for x in batched_inputs]
proposal_losses = {}
_, detector_losses = self.roi_heads(images, height, width, features, proposals, gt_instances)
if self.vis_period > 0:
storage = get_event_storage()
if storage.iter % self.vis_period == 0:
self.visualize_training(batched_inputs, proposals)
losses = {}
losses.update(detector_losses)
losses.update(proposal_losses)
return losses
def inference(self, batched_inputs, height, width, detected_instances=None, do_postprocess=True):
assert not self.training
images = self.preprocess_image(batched_inputs)
features = self.backbone(images.tensor)
if detected_instances is None:
if self.proposal_generator:
proposals, _ = self.proposal_generator(images, features, None)
else:
assert "proposals" in batched_inputs[0]
proposals = [x["proposals"].to(self.device) for x in batched_inputs]
results, _ = self.roi_heads(images, height, width, features, proposals, None)
else:
detected_instances = [x.to(self.device) for x in detected_instances]
results = self.roi_heads.forward_with_given_boxes(height, width, features, detected_instances)
if do_postprocess:
return self._postprocess(results, batched_inputs, images.image_sizes)
else:
# return results
return results[0]._fields
"""
instances[0]._fields
'pred_boxes':
Boxes(tensor([[ 579.6041, 302.4180, 701.0220, 373.7803],
[ 600.7958, 48.7563, 1083.4999, 696.3106],
[ 142.3038, 337.8587, 481.7955, 707.3322],
[ 390.4144, 340.0520, 464.8652, 424.0021]], device='cuda:0'))
'scores':
tensor([0.9806, 0.9795, 0.9651, 0.9143], device='cuda:0')
'pred_classes':
tensor([0, 1, 1, 0], device='cuda:0')
'pred_masks':
tensor([[[[0.0701, 0.0951, 0.1169, ..., 0.0881, 0.0853, 0.0665],
[0.1258, 0.1961, 0.2977, ..., 0.2620, 0.2137, 0.1386],
[0.1894, 0.3168, 0.4793, ..., 0.4686, 0.3339, 0.1788],
...,
[0.3384, 0.4685, 0.6113, ..., 0.5994, 0.5014, 0.2311],
[0.2737, 0.3955, 0.5539, ..., 0.4649, 0.3806, 0.1805],
[0.1767, 0.2438, 0.3607, ..., 0.2455, 0.2246, 0.1151]]],
[[[0.6020, 0.7644, 0.8599, ..., 0.9930, 0.9871, 0.8904],
[0.7081, 0.8718, 0.9489, ..., 0.9989, 0.9977, 0.9610],
[0.8359, 0.9588, 0.9896, ..., 0.9993, 0.9983, 0.9580],
...,
[0.7232, 0.9056, 0.9609, ..., 0.9906, 0.9904, 0.8883],
[0.6591, 0.8414, 0.8881, ..., 0.9833, 0.9800, 0.8654],
[0.6129, 0.7868, 0.8474, ..., 0.9486, 0.9588, 0.8238]]],
[[[0.2969, 0.3252, 0.6798, ..., 0.8717, 0.6705, 0.3716],
[0.3674, 0.4133, 0.7467, ..., 0.9549, 0.8151, 0.4765],
[0.4921, 0.5654, 0.8138, ..., 0.9647, 0.8356, 0.441...
'pred_body_ids':
tensor([1., 1., 2., 2.], device='cuda:0')
"""
def _postprocess(self, instances, batched_inputs, image_sizes):
processed_results = []
for results_per_image, input_per_image, image_size in zip(
instances, batched_inputs, image_sizes
):
height = input_per_image.get("height", image_size[0])
width = input_per_image.get("width", image_size[1])
r = detector_postprocess(results_per_image, height, width)
processed_results.append({"instances": r})
return processed_results
- このモデルは32で割り切れる解像度に設定する必要が有る
ココの boxes.max()
の入力がスカラーになるとき(検出数ゼロのとき)に onnxruntime が Abort するので keepdim=True
に変更する
File "/home/appuser/.local/lib/python3.8/site-packages/torchvision/ops/boxes.py", line 89
# only on the class idx, and is large enough so that boxes
# from different classes do not overlap
if boxes.numel() == 0:
~~~~~~~~~~~~~~~~~~~~~~
return torch.empty((0,), dtype=torch.int64, device=boxes.device)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
# max_coordinate = boxes.max()
max_coordinate, _ = torch.max(boxes, dim=0, keepdim=True)
offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
linear_sum_assignment
row_ind, col_ind = linear_sum_assignment(-scores_numpy)
リトライ。Caffe2
が必要になるため、PyTorch==1.10.1
以下のバージョンを導入する必要がある。
FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && apt-get install -y \
ca-certificates \
python3-dev \
git \
wget \
sudo \
ninja-build \
python-is-python3 \
python3-pip \
libgl1-mesa-dev \
libglib2.0-0 \
libsm6 \
libxrender1 \
libxext-dev \
nano \
gstreamer1.0-plugins-base \
gstreamer1.0-plugins-good \
gstreamer1.0-plugins-bad \
gstreamer1.0-plugins-ugly \
gstreamer1.0-libav \
gstreamer1.0-doc \
gstreamer1.0-tools \
gstreamer1.0-x \
gstreamer1.0-alsa \
gstreamer1.0-gl \
gstreamer1.0-gtk3 \
&& sed -i 's/# set linenumbers/set linenumbers/g' /etc/nanorc \
&& apt clean \
&& rm -rf /var/lib/apt/lists/*
# create a non-root user
ENV USERNAME=user
RUN echo "root:root" | chpasswd \
&& adduser --disabled-password --gecos "" "${USERNAME}" \
&& echo "${USERNAME}:${USERNAME}" | chpasswd \
&& echo "%${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers.d/${USERNAME} \
&& chmod 0440 /etc/sudoers.d/${USERNAME} \
&& mkdir -p /home/${USERNAME}
USER ${USERNAME}
# RUN echo HOME: ${HOME}
# RUN echo PWD: `pwd`
ENV HOME=/home/${USERNAME}
WORKDIR ${HOME}
ENV PATH="${HOME}/.local/bin:${PATH}"
# install dependencies
# See https://pytorch.org/ for other options if you use a different version of CUDA
RUN pip install --user pip -U
RUN pip install --user tensorboard cmake onnx # cmake from apt-get is too old
RUN pip install --user torch==1.10 torchvision==0.11.1 -f https://download.pytorch.org/whl/cu111/torch_stable.html
RUN pip install opencv-contrib-python==4.1.2.30 scipy scikit-image
RUN pip install onnx==1.13.1 onnxsim==0.4.17 onnxruntime-gpu==1.13.1 future==0.18.3
RUN pip install --user 'git+https://github.com/facebookresearch/fvcore'
# install detectron2
RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo
# set FORCE_CUDA because during `docker build` cuda is not accessible
ENV FORCE_CUDA="1"
# This will by default build detectron2 for all common cuda architectures and take a lot more time,
# because inside `docker build`, there is no way to tell which architecture will be used.
ARG TORCH_CUDA_ARCH_LIST="Ampere"
ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
RUN pip install --user -e detectron2_repo
# Set a fixed model cache directory.
ENV FVCORE_CACHE="/tmp"
WORKDIR ${HOME}/detectron2_repo
RUN echo "export QT_X11_NO_MITSHM=1" >> ${HOME}/.bashrc \
&& echo "sudo chmod 777 /dev/video*" >> ${HOME}/.bashrc
docker build -t pinto0309/bodyhands_onnxexport:latest -f Dockerfile.onnxexport .
xhost +local: && \
docker run -it --rm --gpus all \
-v $PWD:/home/user/detectron2_repo/BodyHands \
-v /tmp/.X11-unix/:/tmp/.X11-unix:rw \
--device /dev/video0:/dev/video0:mwr \
--net=host \
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR \
-e DISPLAY=$DISPLAY \
--privileged \
pinto0309/bodyhands_onnxexport:latest /bin/bash
#########################################################
tools/deploy/export_model.py
27 from BodyHands.bodyhands.config.config import add_bodyhands_config
34 add_bodyhands_config(cfg)
/home/user/detectron2_repo/BodyHands/bodyhands/data/dataset_mapper.py
7 from BodyHands.bodyhands.data import detection_utils as utils
/home/user/detectron2_repo/BodyHands/bodyhands/modeling/roi_heads/extract_handbody_components.py
3 from BodyHands.bodyhands.utils.extend_utils_boxes import pairwise_ioa
python tools/deploy/export_model.py \
--config-file BodyHands/configs/BodyHands.yaml \
--output ./ \
--format onnx \
--sample-image BodyHands/480x640.png \
--export-method caffe2_tracing MODEL.DEVICE cuda MODEL.WEIGHTS BodyHands/models/model.pth
無理
Traceback (most recent call last):
File "tools/deploy/export_model.py", line 221, in <module>
exported_model = export_caffe2_tracing(cfg, torch_model, sample_inputs)
File "tools/deploy/export_model.py", line 44, in export_caffe2_tracing
tracer = Caffe2Tracer(cfg, torch_model, inputs)
File "/home/user/detectron2_repo/detectron2/export/api.py", line 60, in __init__
C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE]
KeyError: 'ModifiedPostProcessingRCNN'