😺
MMPoseをC++で動かす
MMPoseを入れる
- git cloneする.
cd ~/Documents git clone https://github.com/open-mmlab/mmpose.git cd mmpose - 依存ライブラリのインストール.
pip install -r requirements.txt - MMPoseのインストール
pip install -e. - モデルのダウンロード
mkdir checkpoints cd checkpoints mim download mmpose --config rtmpose-l_8xb256-420e_coco-wholebody-256x192 --dest . - 人間が写っている適当な画像を
./demo/resources/demo.pngとして保存する.
MMDeployを入れる
- git cloneする.
cd ~/Documents git clone https://github.com/open-mmlab/mmdeploy.git cd mmdeploy - 依存ライブラリのインストール.
pip install -r requirements.txt - MMDeployをインストール
pip install -e.
mmdetectionを入れる
- git cloneする.
cd ~/Documents git clone https://github.com/open-mmlab/mmdetection.git cd mmdetection - MMDetectionをインストール
python3 -m pip install -e. - モデルのダウンロード
mkdir checkpoints cd checkpoints mim download mmdet --config rtmdet_l_8xb32-300e_coco --dest .
ONNXへの変換
今回はRTMPose-lとRTMDetをONNX形式に変換する.
色々なパッケージが必要となるが,AIなどに聞いてみてください(何が必要か忘れたので).
- RTMPoseをONNX形式に変換する.
cd ~/Documents/mmdeploy python tools/deploy.py \ configs/mmpose/pose-detection_simcc_onnxruntime_dynamic.py \ ~/Documents/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py \ ~/Documents/mmpose/checkpoints/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth \ ~/Documents/mmpose/demo/resources/demo.png \ --work-dir mmdeploy_models/rtmpose_coco_onnx - RTMDetをONNX形式に変換する.
cd ~/Documents/mmdeploy python tools/deploy.py \ configs/mmdet/detection/detection_onnxruntime_dynamic.py \ ~/Documents/mmdetection/configs/rtmdet/rtmdet_l_8xb32-300e_coco.py \ ~/Documents/mmdetection/checkpoints/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth \ ~/Documents/mmdetection/demo/demo.jpg \ --work-dir mmdeploy_models/rtmdet_coco_onnx - 変換されたONNXモデルを確認する.の2つが存在すればOK.
ls ~/Documents/mmdeploy/mmdeploy_models/rtmpose_coco_onnx/end2end.onnx ls ~/Documents/mmdeploy/mmdeploy_models/rtmdet_coco_onnx/end2end.onnx
C++用のonnxruntimeをインストール
cd ~/Documents
wget https://github.com/microsoft/onnxruntime/releases/download/v1.22.0/onnxruntime-linux-x64-gpu-1.22.0.tgz
tar -zxvf onnxruntime-linux-x64-gpu-1.22.0.tgz
テストコード
RTMPoseのモデルを~/Documents/mmdeploy/mmdeploy_models/rtmpose_coco_onnx/end2end.onnxに,RTMDetのモデルを~/Documents/mmdeploy/mmdeploy_models/rtmdet_coco_onnx/end2end.onnxに配置しておく.
画像は~/Documents/mmpose/demo/resources/demo.pngに配置しておく.
-
C++でONNXモデルを読み込んで推論するためのテストコード
main.cpp#include <iostream> #include <vector> #include <string> #include <numeric> #include <algorithm> #include <opencv2/opencv.hpp> #include <onnxruntime_cxx_api.h> #include <chrono> #include <omp.h> // ========== データ構造の定義 ========== struct KeyPoint { float x; float y; float score; }; struct BBox { cv::Rect rect; float score; }; // ========== ポーズ推定の結果を格納する構造体 ========== struct PoseResult { BBox bbox; std::vector<KeyPoint> keypoints; }; // ========== ステージ1: 人物検出器 (変更なし) ========== std::vector<BBox> run_detector(Ort::Session& session, const cv::Mat& image) { // --- 1. 前処理 --- const float INPUT_WIDTH = 640.0; const float INPUT_HEIGHT = 640.0; float scale = std::min(INPUT_WIDTH / static_cast<float>(image.cols), INPUT_HEIGHT / static_cast<float>(image.rows)); int new_w = static_cast<int>(image.cols * scale); int new_h = static_cast<int>(image.rows * scale); cv::Mat resized_image; cv::resize(image, resized_image, cv::Size(new_w, new_h)); cv::Mat letterbox_image = cv::Mat::zeros(cv::Size(INPUT_WIDTH, INPUT_HEIGHT), CV_8UC3); resized_image.copyTo(letterbox_image(cv::Rect(0, 0, new_w, new_h))); cv::Mat blob = cv::dnn::blobFromImage(letterbox_image, 1.0f / 255.0f, cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(0, 0, 0), true, false, CV_32F); // --- 2. 推論 --- auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); std::vector<int64_t> input_shape = {1, 3, (int64_t)INPUT_HEIGHT, (int64_t)INPUT_WIDTH}; Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, blob.ptr<float>(), blob.total(), input_shape.data(), input_shape.size()); std::vector<const char*> input_names = {"input"}; std::vector<const char*> output_names = {"dets", "labels"}; auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 2); // --- 3. 後処理 --- const float* dets_data = output_tensors[0].GetTensorData<float>(); const int64_t* labels_data = output_tensors[1].GetTensorData<int64_t>(); auto dets_shape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); const int num_dets = static_cast<int>(dets_shape[1]); std::vector<BBox> final_bboxes; for (int i = 0; i < num_dets; ++i) { if (labels_data[i] == 0) { float score = dets_data[i * 5 + 4]; if (score > 0.3) { int x1 = static_cast<int>(dets_data[i * 5 + 0] / scale); int y1 = static_cast<int>(dets_data[i * 5 + 1] / scale); int x2 = static_cast<int>(dets_data[i * 5 + 2] / scale); int y2 = static_cast<int>(dets_data[i * 5 + 3] / scale); final_bboxes.push_back({cv::Rect(x1, y1, x2 - x1, y2 - y1), score}); } } } return final_bboxes; } // ========== ステージ2: ポーズ推定器 (個別処理版) ========== // 個別推論のため、引数はOrt::Valueに戻します std::vector<KeyPoint> decode_simcc(const Ort::Value& simcc_x_tensor, const Ort::Value& simcc_y_tensor, const cv::Size& model_input_size) { const float* simcc_x_data = simcc_x_tensor.GetTensorData<float>(); const float* simcc_y_data = simcc_y_tensor.GetTensorData<float>(); auto x_shape = simcc_x_tensor.GetTensorTypeAndShapeInfo().GetShape(); auto y_shape = simcc_y_tensor.GetTensorTypeAndShapeInfo().GetShape(); const int num_keypoints = static_cast<int>(x_shape[1]); const int x_dim = static_cast<int>(x_shape[2]); const int y_dim = static_cast<int>(y_shape[2]); const float simcc_split_ratio_x = static_cast<float>(x_dim) / model_input_size.width; const float simcc_split_ratio_y = static_cast<float>(y_dim) / model_input_size.height; std::vector<KeyPoint> keypoints; keypoints.reserve(num_keypoints); for (int k = 0; k < num_keypoints; ++k) { const float* x_k_data = simcc_x_data + k * x_dim; int max_idx_x = std::distance(x_k_data, std::max_element(x_k_data, x_k_data + x_dim)); float coord_x = static_cast<float>(max_idx_x) / simcc_split_ratio_x; const float* y_k_data = simcc_y_data + k * y_dim; int max_idx_y = std::distance(y_k_data, std::max_element(y_k_data, y_k_data + y_dim)); float coord_y = static_cast<float>(max_idx_y) / simcc_split_ratio_y; float final_score = std::max(x_k_data[max_idx_x], y_k_data[max_idx_y]); keypoints.push_back({coord_x, coord_y, final_score}); } return keypoints; } // ========== メイン処理 [計算と描画を分離] ========== int main() { const std::string detector_model_path = "~/Documents/cpp_test/rtmpose_cpp_demo/models/det_end2end_l.onnx"; const std::string pose_model_path = "~/Documents/cpp_test/rtmpose_cpp_demo/models/pose_end2end_l.onnx"; const std::string image_path = "~/Documents/cpp_test/human-pose.png"; Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "PipelineDemo"); // --- セッションの初期化 --- Ort::SessionOptions detector_session_options; detector_session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); Ort::Session detector_session(env, detector_model_path.c_str(), detector_session_options); Ort::SessionOptions pose_session_options; pose_session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); Ort::Session pose_session(env, pose_model_path.c_str(), pose_session_options); cv::Mat original_image = cv::imread(image_path); if (original_image.empty()) { std::cerr << "Could not read the image: " << image_path << std::endl; return -1; } using Clock = std::chrono::high_resolution_clock; auto start_time = Clock::now(); // --- ステージ1: 人物検出 --- std::vector<BBox> bboxes = run_detector(detector_session, original_image); std::cout << "Detected " << bboxes.size() << " persons." << std::endl; if (bboxes.empty()) return 0; auto detector_end_time = Clock::now(); std::chrono::duration<double, std::milli> detector_elapsed_time = detector_end_time - start_time; std::cout << "Detector processing time: " << detector_elapsed_time.count() << " ms" << std::endl; // --- ★ 1. 並列計算ループ ★ --- // 結果を格納するためのベクターを準備 std::vector<PoseResult> final_results(bboxes.size()); #pragma omp parallel for for (int i = 0; i < bboxes.size(); ++i) { const auto& bbox_info = bboxes[i]; // --- 前処理 --- cv::Rect padded_bbox = bbox_info.rect; padded_bbox.x -= 10; padded_bbox.y -= 10; padded_bbox.width += 20; padded_bbox.height += 20; padded_bbox &= cv::Rect(0, 0, original_image.cols, original_image.rows); if (padded_bbox.width == 0 || padded_bbox.height == 0) continue; cv::Mat cropped_image = original_image(padded_bbox); const cv::Size pose_input_size(192, 256); double scale = std::min(static_cast<double>(pose_input_size.width) / cropped_image.cols, static_cast<double>(pose_input_size.height) / cropped_image.rows); int new_w = static_cast<int>(cropped_image.cols * scale); int new_h = static_cast<int>(cropped_image.rows * scale); cv::Mat resized_crop; cv::resize(cropped_image, resized_crop, cv::Size(new_w, new_h)); cv::Mat padded_crop = cv::Mat::zeros(pose_input_size, CV_8UC3); resized_crop.copyTo(padded_crop(cv::Rect(0, 0, new_w, new_h))); cv::Mat pose_blob = cv::dnn::blobFromImage(padded_crop, 1.0f / 255.0f, pose_input_size, cv::Scalar(0,0,0), true, false, CV_32F); // --- 推論 --- auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); std::vector<int64_t> pose_input_shape = {1, 3, pose_input_size.height, pose_input_size.width}; std::vector<const char*> pose_input_names = {"input"}; std::vector<const char*> pose_output_names = {"simcc_x", "simcc_y"}; Ort::Value pose_input_tensor = Ort::Value::CreateTensor<float>(memory_info, pose_blob.ptr<float>(), pose_blob.total(), pose_input_shape.data(), pose_input_shape.size()); auto pose_output_tensors = pose_session.Run(Ort::RunOptions{nullptr}, pose_input_names.data(), &pose_input_tensor, 1, pose_output_names.data(), 2); // --- 後処理 --- std::vector<KeyPoint> keypoints_in_padded = decode_simcc(pose_output_tensors[0], pose_output_tensors[1], pose_input_size); std::vector<KeyPoint> final_keypoints; final_keypoints.reserve(keypoints_in_padded.size()); for(const auto& kp : keypoints_in_padded) { float x_in_crop = kp.x / scale; float y_in_crop = kp.y / scale; final_keypoints.push_back({ x_in_crop + padded_bbox.x, y_in_crop + padded_bbox.y, kp.score }); } // --- 結果を格納 --- final_results[i] = {bbox_info, final_keypoints}; } auto end_time = Clock::now(); std::chrono::duration<double, std::milli> elapsed_time = end_time - start_time; std::cout << "Total processing time: " << elapsed_time.count() << " ms" << std::endl; // --- ★ 2. 直列描画ループ ★ --- const std::vector<std::pair<int, int>> skeleton = {{15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, {5, 6}, {5, 7}, {6, 8}, {7, 9}, {8, 10}, {1, 2}, {0, 1}, {0, 2}, {1, 3}, {2, 4}, {0, 5}, {0, 6}}; const cv::Scalar bone_color(0, 255, 0); const cv::Scalar joint_color(0, 0, 255); const float KEYPOINT_CONFIDENCE_THRESHOLD = 0.45; for (const auto& result : final_results) { if (result.bbox.rect.empty() || result.keypoints.empty()) continue; // BBoxを描画 cv::rectangle(original_image, result.bbox.rect, cv::Scalar(255, 0, 0), 2); // 関節(点)を描画 for (const auto& kp : result.keypoints) { if (kp.score > KEYPOINT_CONFIDENCE_THRESHOLD) { cv::circle(original_image, cv::Point(static_cast<int>(kp.x), static_cast<int>(kp.y)), 3, joint_color, -1); } } // 骨格(線)を描画 for (const auto& bone : skeleton) { const KeyPoint& kp1 = result.keypoints[bone.first]; const KeyPoint& kp2 = result.keypoints[bone.second]; if (kp1.score > KEYPOINT_CONFIDENCE_THRESHOLD && kp2.score > KEYPOINT_CONFIDENCE_THRESHOLD) { cv::line(original_image, cv::Point(static_cast<int>(kp1.x), static_cast<int>(kp1.y)), cv::Point(static_cast<int>(kp2.x), static_cast<int>(kp2.y)), bone_color, 2); } } } cv::imshow("Final Pipeline Result", original_image); cv::waitKey(0); cv::imwrite("pipeline_result_separate.jpg", original_image); return 0; } -
CMakeLists.txtの設定は以下の通り.
CMakeLists.txt# CMakeの最低バージョンとプロジェクト名を設定 cmake_minimum_required(VERSION 3.13) project(RTMPoseDemo CXX) # C++のバージョンを17に設定 set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # 必要なパッケージを探す (OpenCVとOpenMP) find_package(OpenCV REQUIRED) find_package(OpenMP REQUIRED) # 実行ファイルを作成 add_executable(rtmpose_demo src/main.cpp) # 実行ファイルに必要なヘッダーファイルの場所を教える target_include_directories(rtmpose_demo PRIVATE ${OpenCV_INCLUDE_DIRS} "~/Documents/onnxruntime-linux-x64-gpu-1.22.0/include" ) # 実行ファイルに必要なライブラリをリンクする target_link_libraries(rtmpose_demo PRIVATE ${OpenCV_LIBS} OpenMP::OpenMP_CXX "~/Documents/onnxruntime-linux-x64-gpu-1.22.0/lib/libonnxruntime.so" )
Discussion