/** * YOLO3D C++ Implementation with TensorRT for Production * * This implementation provides a high-performance C++ version of YOLO3D * for real-time pedestrian and cyclist detection from LiDAR point clouds. * * Key optimizations: * - TensorRT for GPU inference acceleration * - CUDA kernels for BEV conversion * - Multi-threading for point cloud preprocessing * - Memory pooling to reduce allocations */ #ifndef YOLO3D_DETECTOR_HPP #define YOLO3D_DETECTOR_HPP #include #include #include #include #include #include #include #include #include // CUDA and TensorRT includes #include #include #include #include // OpenCV for visualization #include // PCL for point cloud handling (optional) // #include // #include namespace yolo3d { // Forward declarations class Logger; class BEVConverter; class TensorRTEngine; class YOLO3DDetector; /** * 3D Detection structure */ struct Detection3D { float x, y, z; // Center position float l, w, h; // Dimensions float theta; // Rotation angle (yaw) int class_id; // 0: Car, 1: Pedestrian, 2: Cyclist float confidence; // Detection confidence // Additional tracking info int track_id = -1; cv::Point2f velocity = {0, 0}; }; /** * Point cloud point structure */ struct PointXYZI { float x, y, z; float intensity; }; /** * Configuration parameters */ struct Config { // BEV parameters float x_min = -40.0f, x_max = 40.0f; float y_min = -40.0f, y_max = 40.0f; float z_min = -3.0f, z_max = 1.0f; float resolution = 0.1f; int bev_width = 608; int bev_height = 608; // Detection parameters float conf_threshold = 0.5f; float nms_threshold = 0.4f; // Performance parameters bool use_fp16 = true; bool use_int8 = false; int max_batch_size = 1; int num_threads = 4; // Class names std::vector class_names = {"Car", "Pedestrian", "Cyclist"}; }; /** * TensorRT Logger implementation */ class Logger : public nvinfer1::ILogger { public: void log(Severity severity, const char* msg) noexcept override { if (severity <= Severity::kWARNING) { std::cout << "[TensorRT] " << msg << std::endl; } } }; /** * CUDA kernels for BEV conversion */ namespace cuda { __global__ void pointCloudToBEV( const float* points, int num_points, float* bev_height, float* bev_intensity, float* bev_density, int bev_width, int bev_height, float x_min, float x_max, float y_min, float y_max, float z_min, float z_max, float resolution ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= num_points) return; // Get point coordinates float x = points[idx * 4 + 0]; float y = points[idx * 4 + 1]; float z = points[idx * 4 + 2]; float intensity = points[idx * 4 + 3]; // Check if point is within bounds if (x < x_min || x >= x_max || y < y_min || y >= y_max || z < z_min || z >= z_max) { return; } // Convert to BEV pixel coordinates int px = static_cast((x - x_min) / resolution); int py = static_cast((y - y_min) / resolution); // Clip to ensure within bounds px = min(max(px, 0), bev_width - 1); py = min(max(py, 0), bev_height - 1); int pixel_idx = py * bev_width + px; // Update height map (atomic max) atomicMax(&bev_height[pixel_idx], z); // Update intensity map (atomic max) atomicMax(&bev_intensity[pixel_idx], intensity); // Update density map (atomic add) atomicAdd(&bev_density[pixel_idx], 1.0f); } /** * Post-process BEV maps (normalization) */ __global__ void normalizeBEV( float* bev_height, float* bev_intensity, float* bev_density, int size, float z_min, float z_max ) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= size) return; // Normalize height bev_height[idx] = (bev_height[idx] - z_min) / (z_max - z_min); // Normalize intensity (assuming 0-255 range) bev_intensity[idx] = bev_intensity[idx] / 255.0f; // Normalize density (log scale) bev_density[idx] = logf(bev_density[idx] + 1.0f) / logf(64.0f); } } // namespace cuda /** * BEV Converter using CUDA */ class BEVConverter { private: Config config_; // Device memory float* d_points_ = nullptr; float* d_bev_height_ = nullptr; float* d_bev_intensity_ = nullptr; float* d_bev_density_ = nullptr; float* d_bev_output_ = nullptr; size_t max_points_ = 150000; public: BEVConverter(const Config& config) : config_(config) { allocateMemory(); } ~BEVConverter() { freeMemory(); } void allocateMemory() { size_t bev_size = config_.bev_width * config_.bev_height; cudaMalloc(&d_points_, max_points_ * 4 * sizeof(float)); cudaMalloc(&d_bev_height_, bev_size * sizeof(float)); cudaMalloc(&d_bev_intensity_, bev_size * sizeof(float)); cudaMalloc(&d_bev_density_, bev_size * sizeof(float)); cudaMalloc(&d_bev_output_, bev_size * 3 * sizeof(float)); } void freeMemory() { if (d_points_) cudaFree(d_points_); if (d_bev_height_) cudaFree(d_bev_height_); if (d_bev_intensity_) cudaFree(d_bev_intensity_); if (d_bev_density_) cudaFree(d_bev_density_); if (d_bev_output_) cudaFree(d_bev_output_); } /** * Convert point cloud to BEV using CUDA */ cv::Mat convertToBEV(const std::vector& points) { int num_points = std::min(static_cast(points.size()), static_cast(max_points_)); size_t bev_size = config_.bev_width * config_.bev_height; // Copy points to device cudaMemcpy(d_points_, points.data(), num_points * sizeof(PointXYZI), cudaMemcpyHostToDevice); // Clear BEV maps cudaMemset(d_bev_height_, 0, bev_size * sizeof(float)); cudaMemset(d_bev_intensity_, 0, bev_size * sizeof(float)); cudaMemset(d_bev_density_, 0, bev_size * sizeof(float)); // Launch kernel for BEV conversion int block_size = 256; int grid_size = (num_points + block_size - 1) / block_size; cuda::pointCloudToBEV<<>>( d_points_, num_points, d_bev_height_, d_bev_intensity_, d_bev_density_, config_.bev_width, config_.bev_height, config_.x_min, config_.x_max, config_.y_min, config_.y_max, config_.z_min, config_.z_max, config_.resolution ); // Normalize BEV maps grid_size = (bev_size + block_size - 1) / block_size; cuda::normalizeBEV<<>>( d_bev_height_, d_bev_intensity_, d_bev_density_, bev_size, config_.z_min, config_.z_max ); // Combine channels (HWC format for visualization, CHW for inference) // For now, we'll create HWC for OpenCV cv::Mat bev_map(config_.bev_height, config_.bev_width, CV_32FC3); // Copy from device to host std::vector height(bev_size), intensity(bev_size), density(bev_size); cudaMemcpy(height.data(), d_bev_height_, bev_size * sizeof(float), cudaMemcpyDeviceToHost); cudaMemcpy(intensity.data(), d_bev_intensity_, bev_size * sizeof(float), cudaMemcpyDeviceToHost); cudaMemcpy(density.data(), d_bev_density_, bev_size * sizeof(float), cudaMemcpyDeviceToHost); // Fill OpenCV Mat for (int y = 0; y < config_.bev_height; ++y) { for (int x = 0; x < config_.bev_width; ++x) { int idx = y * config_.bev_width + x; bev_map.at(y, x) = cv::Vec3f(height[idx], intensity[idx], density[idx]); } } return bev_map; } /** * Get BEV tensor for TensorRT (CHW format) */ void getBEVTensor(float* output) { size_t bev_size = config_.bev_width * config_.bev_height; // Copy in CHW format cudaMemcpy(output, d_bev_height_, bev_size * sizeof(float), cudaMemcpyDeviceToDevice); cudaMemcpy(output + bev_size, d_bev_intensity_, bev_size * sizeof(float), cudaMemcpyDeviceToDevice); cudaMemcpy(output + 2 * bev_size, d_bev_density_, bev_size * sizeof(float), cudaMemcpyDeviceToDevice); } }; /** * TensorRT Engine wrapper */ class TensorRTEngine { private: Logger logger_; std::unique_ptr runtime_; std::unique_ptr engine_; std::unique_ptr context_; // Buffers void* buffers_[2]; // Input and output size_t input_size_; size_t output_size_; cudaStream_t stream_; public: TensorRTEngine() { cudaStreamCreate(&stream_); } ~TensorRTEngine() { if (buffers_[0]) cudaFree(buffers_[0]); if (buffers_[1]) cudaFree(buffers_[1]); cudaStreamDestroy(stream_); } /** * Load TensorRT engine from file */ bool loadEngine(const std::string& engine_path) { std::ifstream file(engine_path, std::ios::binary); if (!file.good()) { std::cerr << "Failed to open engine file: " << engine_path << std::endl; return false; } file.seekg(0, file.end); size_t size = file.tellg(); file.seekg(0, file.beg); std::vector engine_data(size); file.read(engine_data.data(), size); file.close(); runtime_.reset(nvinfer1::createInferRuntime(logger_)); engine_.reset(runtime_->deserializeCudaEngine(engine_data.data(), size)); context_.reset(engine_->createExecutionContext()); // Allocate buffers int input_idx = engine_->getBindingIndex("input"); int output_idx = engine_->getBindingIndex("output"); auto input_dims = engine_->getBindingDimensions(input_idx); auto output_dims = engine_->getBindingDimensions(output_idx); input_size_ = getSizeByDim(input_dims) * sizeof(float); output_size_ = getSizeByDim(output_dims) * sizeof(float); cudaMalloc(&buffers_[input_idx], input_size_); cudaMalloc(&buffers_[output_idx], output_size_); return true; } /** * Build engine from ONNX */ bool buildEngine(const std::string& onnx_path, const Config& config) { auto builder = std::unique_ptr(nvinfer1::createInferBuilder(logger_)); auto network = std::unique_ptr( builder->createNetworkV2(1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)) ); auto parser = std::unique_ptr( nvonnxparser::createParser(*network, logger_) ); // Parse ONNX if (!parser->parseFromFile(onnx_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kWARNING))) { std::cerr << "Failed to parse ONNX file" << std::endl; return false; } // Build configuration auto build_config = std::unique_ptr(builder->createBuilderConfig()); build_config->setMaxWorkspaceSize(1 << 30); // 1GB if (config.use_fp16 && builder->platformHasFastFp16()) { build_config->setFlag(nvinfer1::BuilderFlag::kFP16); } if (config.use_int8 && builder->platformHasFastInt8()) { build_config->setFlag(nvinfer1::BuilderFlag::kINT8); // Note: INT8 calibration would need to be added here } // Build engine engine_.reset(builder->buildEngineWithConfig(*network, *build_config)); if (!engine_) { std::cerr << "Failed to build engine" << std::endl; return false; } // Save engine auto serialized = std::unique_ptr(engine_->serialize()); std::ofstream file("yolo3d.engine", std::ios::binary); file.write(reinterpret_cast(serialized->data()), serialized->size()); file.close(); return true; } /** * Run inference */ std::vector infer(float* input_data) { // Copy input to device cudaMemcpyAsync(buffers_[0], input_data, input_size_, cudaMemcpyHostToDevice, stream_); // Run inference context_->enqueueV2(buffers_, stream_, nullptr); // Copy output to host std::vector output(output_size_ / sizeof(float)); cudaMemcpyAsync(output.data(), buffers_[1], output_size_, cudaMemcpyDeviceToHost, stream_); cudaStreamSynchronize(stream_); // Parse detections return parseDetections(output); } private: size_t getSizeByDim(const nvinfer1::Dims& dims) { size_t size = 1; for (int i = 0; i < dims.nbDims; ++i) { size *= dims.d[i]; } return size; } /** * Parse network output to detections */ std::vector parseDetections(const std::vector& output) { std::vector detections; // Assuming output format: [num_detections, 10] // where each detection is [x, y, z, l, w, h, theta, class_id, confidence, reserved] int num_detections = output.size() / 10; for (int i = 0; i < num_detections; ++i) { int offset = i * 10; Detection3D det; det.x = output[offset + 0]; det.y = output[offset + 1]; det.z = output[offset + 2]; det.l = output[offset + 3]; det.w = output[offset + 4]; det.h = output[offset + 5]; det.theta = output[offset + 6]; det.class_id = static_cast(output[offset + 7]); det.confidence = output[offset + 8]; detections.push_back(det); } return detections; } }; /** * Non-Maximum Suppression for 3D boxes */ class NMS3D { public: static std::vector apply(std::vector& detections, float threshold) { // Sort by confidence std::sort(detections.begin(), detections.end(), [](const Detection3D& a, const Detection3D& b) { return a.confidence > b.confidence; }); std::vector result; std::vector suppressed(detections.size(), false); for (size_t i = 0; i < detections.size(); ++i) { if (suppressed[i]) continue; result.push_back(detections[i]); // Suppress overlapping detections for (size_t j = i + 1; j < detections.size(); ++j) { if (suppressed[j]) continue; if (detections[i].class_id == detections[j].class_id) { float iou = calculateBEVIoU(detections[i], detections[j]); if (iou > threshold) { suppressed[j] = true; } } } } return result; } private: static float calculateBEVIoU(const Detection3D& a, const Detection3D& b) { // Simplified 2D IoU calculation (ignoring rotation for now) float x1_min = a.x - a.l / 2; float x1_max = a.x + a.l / 2; float y1_min = a.y - a.w / 2; float y1_max = a.y + a.w / 2; float x2_min = b.x - b.l / 2; float x2_max = b.x + b.l / 2; float y2_min = b.y - b.w / 2; float y2_max = b.y + b.w / 2; float xi_min = std::max(x1_min, x2_min); float xi_max = std::min(x1_max, x2_max); float yi_min = std::max(y1_min, y2_min); float yi_max = std::min(y1_max, y2_max); if (xi_max < xi_min || yi_max < yi_min) { return 0.0f; } float intersection = (xi_max - xi_min) * (yi_max - yi_min); float union_area = (x1_max - x1_min) * (y1_max - y1_min) + (x2_max - x2_min) * (y2_max - y2_min) - intersection; return intersection / union_area; } }; /** * Main YOLO3D Detector class */ class YOLO3DDetector { private: Config config_; std::unique_ptr bev_converter_; std::unique_ptr engine_; // Performance monitoring std::chrono::high_resolution_clock::time_point last_time_; float fps_ = 0.0f; // Thread pool for parallel processing std::vector workers_; std::queue> tasks_; std::mutex queue_mutex_; std::condition_variable cv_; bool stop_ = false; public: YOLO3DDetector(const Config& config) : config_(config) { bev_converter_ = std::make_unique(config); engine_ = std::make_unique(); // Initialize thread pool for (int i = 0; i < config.num_threads; ++i) { workers_.emplace_back([this] { workerThread(); }); } } ~YOLO3DDetector() { // Stop thread pool { std::unique_lock lock(queue_mutex_); stop_ = true; } cv_.notify_all(); for (auto& worker : workers_) { worker.join(); } } /** * Initialize detector with model */ bool init(const std::string& model_path) { if (model_path.find(".engine") != std::string::npos) { return engine_->loadEngine(model_path); } else if (model_path.find(".onnx") != std::string::npos) { return engine_->buildEngine(model_path, config_); } return false; } /** * Detect pedestrians and cyclists */ std::vector detect(const std::vector& point_cloud) { auto start = std::chrono::high_resolution_clock::now(); // Convert to BEV cv::Mat bev_map = bev_converter_->convertToBEV(point_cloud); // Prepare input tensor std::vector input_tensor(3 * config_.bev_width * config_.bev_height); // Convert HWC to CHW for (int c = 0; c < 3; ++c) { for (int y = 0; y < config_.bev_height; ++y) { for (int x = 0; x < config_.bev_width; ++x) { int chw_idx = c * config_.bev_height * config_.bev_width + y * config_.bev_width + x; input_tensor[chw_idx] = bev_map.at(y, x)[c]; } } } // Run inference std::vector detections = engine_->infer(input_tensor.data()); // Filter by confidence detections.erase( std::remove_if(detections.begin(), detections.end(), [this](const Detection3D& d) { return d.confidence < config_.conf_threshold; }), detections.end() ); // Apply NMS detections = NMS3D::apply(detections, config_.nms_threshold); // Filter for pedestrians and cyclists only detections.erase( std::remove_if(detections.begin(), detections.end(), [](const Detection3D& d) { return d.class_id == 0; }), // Remove cars detections.end() ); // Update FPS auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start); fps_ = 1000.0f / duration.count(); return detections; } /** * Batch detection for multiple point clouds */ std::vector> detectBatch( const std::vector>& point_clouds) { std::vector> batch_results(point_clouds.size()); std::vector> futures; // Process in parallel for (size_t i = 0; i < point_clouds.size(); ++i) { auto task = [this, &point_clouds, &batch_results, i]() { batch_results[i] = detect(point_clouds[i]); }; std::packaged_task packaged_task(task); futures.push_back(packaged_task.get_future()); { std::unique_lock lock(queue_mutex_); tasks_.push(std::move(packaged_task)); } cv_.notify_one(); } // Wait for all tasks to complete for (auto& future : futures) { future.wait(); } return batch_results; } float getFPS() const { return fps_; } private: void workerThread() { while (true) { std::function task; { std::unique_lock lock(queue_mutex_); cv_.wait(lock, [this] { return stop_ || !tasks_.empty(); }); if (stop_ && tasks_.empty()) { return; } task = std::move(tasks_.front()); tasks_.pop(); } task(); } } }; /** * Visualization utilities */ class Visualizer { public: static void drawBEVDetections(cv::Mat& image, const std::vector& detections, const Config& config) { // Convert to color image if grayscale if (image.channels() == 1) { cv::cvtColor(image, image, cv::COLOR_GRAY2BGR); } // Scale to display size cv::Mat display; cv::resize(image, display, cv::Size(800, 800)); // Draw detections for (const auto& det : detections) { cv::Scalar color; std::string label; if (det.class_id == 1) { // Pedestrian color = cv::Scalar(0, 0, 255); // Red label = "Pedestrian"; } else if (det.class_id == 2) { // Cyclist color = cv::Scalar(255, 0, 0); // Blue label = "Cyclist"; } else { continue; } // Convert world coordinates to pixel coordinates int px = static_cast((det.x - config.x_min) / (config.x_max - config.x_min) * 800); int py = static_cast((det.y - config.y_min) / (config.y_max - config.y_min) * 800); // Draw rotated rectangle cv::RotatedRect rect( cv::Point2f(px, py), cv::Size2f(det.l / (config.x_max - config.x_min) * 800, det.w / (config.y_max - config.y_min) * 800), -det.theta * 180 / M_PI ); cv::Point2f vertices[4]; rect.points(vertices); for (int i = 0; i < 4; ++i) { cv::line(display, vertices[i], vertices[(i + 1) % 4], color, 2); } // Draw label cv::putText(display, label + " " + std::to_string(int(det.confidence * 100)) + "%", cv::Point(px - 30, py - 10), cv::FONT_HERSHEY_SIMPLEX, 0.5, color, 2); } cv::imshow("YOLO3D Detections", display); cv::waitKey(1); } /** * Save detection results to file */ static void saveDetections(const std::string& filename, const std::vector& detections, const Config& config) { std::ofstream file(filename); file << "class_id,class_name,x,y,z,l,w,h,theta,confidence\n"; for (const auto& det : detections) { file << det.class_id << "," << config.class_names[det.class_id] << "," << det.x << "," << det.y << "," << det.z << "," << det.l << "," << det.w << "," << det.h << "," << det.theta << "," << det.confidence << "\n"; } file.close(); } }; /** * Point cloud loader utilities */ class PointCloudLoader { public: /** * Load KITTI binary format */ static std::vector loadKITTIBin(const std::string& filename) { std::vector points; std::ifstream file(filename, std::ios::binary); if (!file.is_open()) { std::cerr << "Failed to open file: " << filename << std::endl; return points; } file.seekg(0, std::ios::end); size_t file_size = file.tellg(); file.seekg(0, std::ios::beg); size_t num_points = file_size / (4 * sizeof(float)); points.reserve(num_points); float data[4]; while (file.read(reinterpret_cast(data), 4 * sizeof(float))) { PointXYZI point; point.x = data[0]; point.y = data[1]; point.z = data[2]; point.intensity = data[3]; points.push_back(point); } file.close(); return points; } /** * Load from CSV format */ static std::vector loadCSV(const std::string& filename) { std::vector points; std::ifstream file(filename); if (!file.is_open()) { std::cerr << "Failed to open file: " << filename << std::endl; return points; } std::string line; // Skip header if exists std::getline(file, line); while (std::getline(file, line)) { std::stringstream ss(line); std::string token; PointXYZI point; std::getline(ss, token, ','); point.x = std::stof(token); std::getline(ss, token, ','); point.y = std::stof(token); std::getline(ss, token, ','); point.z = std::stof(token); std::getline(ss, token, ','); point.intensity = std::stof(token); points.push_back(point); } file.close(); return points; } }; /** * ROS Integration (optional) */ #ifdef USE_ROS #include #include #include #include class YOLO3DROSNode { private: ros::NodeHandle nh_; ros::Subscriber pc_sub_; ros::Publisher marker_pub_; std::unique_ptr detector_; Config config_; public: YOLO3DROSNode() : nh_("~") { // Load parameters nh_.param("x_min", config_.x_min, -40.0f); nh_.param("x_max", config_.x_max, 40.0f); nh_.param("y_min", config_.y_min, -40.0f); nh_.param("y_max", config_.y_max, 40.0f); nh_.param("z_min", config_.z_min, -3.0f); nh_.param("z_max", config_.z_max, 1.0f); nh_.param("conf_threshold", config_.conf_threshold, 0.5f); nh_.param("nms_threshold", config_.nms_threshold, 0.4f); nh_.param("use_fp16", config_.use_fp16, true); std::string model_path; nh_.param("model_path", model_path, "yolo3d.engine"); // Initialize detector detector_ = std::make_unique(config_); if (!detector_->init(model_path)) { ROS_ERROR("Failed to initialize YOLO3D detector"); ros::shutdown(); } // Setup subscribers and publishers pc_sub_ = nh_.subscribe("/velodyne_points", 1, &YOLO3DROSNode::pointCloudCallback, this); marker_pub_ = nh_.advertise("/yolo3d/detections", 1); ROS_INFO("YOLO3D detector initialized"); } void pointCloudCallback(const sensor_msgs::PointCloud2::ConstPtr& msg) { // Convert ROS PointCloud2 to vector pcl::PointCloud pcl_cloud; pcl::fromROSMsg(*msg, pcl_cloud); std::vector points; points.reserve(pcl_cloud.size()); for (const auto& pt : pcl_cloud) { PointXYZI point; point.x = pt.x; point.y = pt.y; point.z = pt.z; point.intensity = pt.intensity; points.push_back(point); } // Run detection auto detections = detector_->detect(points); // Publish markers publishMarkers(detections, msg->header); ROS_INFO_THROTTLE(1.0, "Processing at %.1f FPS, detected %zu objects", detector_->getFPS(), detections.size()); } void publishMarkers(const std::vector& detections, const std_msgs::Header& header) { visualization_msgs::MarkerArray marker_array; // Delete all previous markers visualization_msgs::Marker delete_marker; delete_marker.header = header; delete_marker.action = visualization_msgs::Marker::DELETEALL; marker_array.markers.push_back(delete_marker); int id = 0; for (const auto& det : detections) { // Bounding box marker visualization_msgs::Marker box_marker; box_marker.header = header; box_marker.id = id++; box_marker.type = visualization_msgs::Marker::CUBE; box_marker.action = visualization_msgs::Marker::ADD; box_marker.pose.position.x = det.x; box_marker.pose.position.y = det.y; box_marker.pose.position.z = det.z; // Convert theta to quaternion box_marker.pose.orientation.w = cos(det.theta / 2); box_marker.pose.orientation.z = sin(det.theta / 2); box_marker.scale.x = det.l; box_marker.scale.y = det.w; box_marker.scale.z = det.h; // Set color based on class if (det.class_id == 1) { // Pedestrian box_marker.color.r = 1.0; box_marker.color.g = 0.0; box_marker.color.b = 0.0; } else { // Cyclist box_marker.color.r = 0.0; box_marker.color.g = 0.0; box_marker.color.b = 1.0; } box_marker.color.a = 0.5; box_marker.lifetime = ros::Duration(0.1); marker_array.markers.push_back(box_marker); // Text marker visualization_msgs::Marker text_marker = box_marker; text_marker.id = id++; text_marker.type = visualization_msgs::Marker::TEXT_VIEW_FACING; text_marker.scale.z = 0.5; text_marker.color.a = 1.0; text_marker.text = config_.class_names[det.class_id] + " (" + std::to_string(int(det.confidence * 100)) + "%)"; marker_array.markers.push_back(text_marker); } marker_pub_.publish(marker_array); } }; #endif } // namespace yolo3d #endif // YOLO3D_DETECTOR_HPP // ============================================================================== // Main application example // ============================================================================== int main(int argc, char** argv) { using namespace yolo3d; #ifdef USE_ROS ros::init(argc, argv, "yolo3d_detector"); YOLO3DROSNode node; ros::spin(); #else // Standalone application if (argc < 3) { std::cerr << "Usage: " << argv[0] << " [output_path]" << std::endl; return 1; } std::string model_path = argv[1]; std::string pc_path = argv[2]; std::string output_path = argc > 3 ? argv[3] : "detections.csv"; // Configure detector Config config; config.conf_threshold = 0.5f; config.nms_threshold = 0.4f; config.use_fp16 = true; // Initialize detector YOLO3DDetector detector(config); if (!detector.init(model_path)) { std::cerr << "Failed to initialize detector with model: " << model_path << std::endl; return 1; } // Load point cloud std::vector points; if (pc_path.find(".bin") != std::string::npos) { points = PointCloudLoader::loadKITTIBin(pc_path); } else if (pc_path.find(".csv") != std::string::npos) { points = PointCloudLoader::loadCSV(pc_path); } else { std::cerr << "Unsupported file format: " << pc_path << std::endl; return 1; } std::cout << "Loaded " << points.size() << " points" << std::endl; // Run detection auto start = std::chrono::high_resolution_clock::now(); auto detections = detector.detect(points); auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start); std::cout << "Detection took " << duration.count() << " ms" << std::endl; std::cout << "FPS: " << detector.getFPS() << std::endl; std::cout << "Detected " << detections.size() << " pedestrians/cyclists" << std::endl; // Print results for (size_t i = 0; i < detections.size(); ++i) { const auto& det = detections[i]; std::cout << "\n" << i + 1 << ". " << config.class_names[det.class_id] << ":" << std::endl; std::cout << " Position: (" << det.x << ", " << det.y << ", " << det.z << ") m" << std::endl; std::cout << " Dimensions: " << det.l << " x " << det.w << " x " << det.h << " m" << std::endl; std::cout << " Rotation: " << det.theta * 180 / M_PI << " degrees" << std::endl; std::cout << " Confidence: " << det.confidence * 100 << "%" << std::endl; } // Save results Visualizer::saveDetections(output_path, detections, config); std::cout << "\nResults saved to: " << output_path << std::endl; // Visualize if display is available cv::Mat bev_display(config.bev_height, config.bev_width, CV_8UC3, cv::Scalar(0, 0, 0)); Visualizer::drawBEVDetections(bev_display, detections, config); cv::waitKey(0); #endif return 0; }