Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ else()
endif()

option(IPC_TOOLKIT_WITH_CUDA "Enable CUDA CCD" OFF)
option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" ON)
option(IPC_TOOLKIT_WITH_RATIONAL_INTERSECTION "Use rational edge-triangle intersection check" OFF)
option(IPC_TOOLKIT_WITH_ROBIN_MAP "Use Tessil's robin-map rather than std maps" ON)
option(IPC_TOOLKIT_WITH_ABSEIL "Use Abseil's hash functions" ON)
Expand All @@ -83,10 +84,8 @@ option(IPC_TOOLKIT_WITH_INEXACT_CCD "Use the original inexact CCD meth
option(IPC_TOOLKIT_WITH_PROFILER "Enable performance profiler" OFF)

# Advanced options
option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" OFF)
option(IPC_TOOLKIT_WITH_CODE_COVERAGE "Enable coverage reporting" OFF)

mark_as_advanced(IPC_TOOLKIT_WITH_SIMD) # This does not work reliably
mark_as_advanced(IPC_TOOLKIT_WITH_CODE_COVERAGE) # This is used in GitHub Actions

# Set default minimum C++ standard
Expand All @@ -112,9 +111,10 @@ include(ipc_toolkit_use_colors)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

################################################################################
# CUDA
# Verify Options
################################################################################

# CUDA support
if(IPC_TOOLKIT_WITH_CUDA)
# If CMAKE_CUDA_ARCHITECTURES was not specified, set it to native.
if(DEFINED CMAKE_CUDA_ARCHITECTURES)
Expand All @@ -129,6 +129,19 @@ if(IPC_TOOLKIT_WITH_CUDA)
enable_language(CUDA)
endif()

## SIMD support
if(IPC_TOOLKIT_WITH_SIMD)
# Figure out SIMD support
message(STATUS "Testing SIMD capabilities...")
find_package(SIMD)
if (SIMD_CXX_FLAGS)
message(STATUS "SIMD support found: ${SIMD_CXX_FLAGS}")
else()
message(WARNING "SIMD support requested but not found. Continuing without SIMD.")
set(IPC_TOOLKIT_WITH_SIMD OFF CACHE BOOL "Enable SIMD" FORCE)
endif()
endif()

################################################################################
# IPC Toolkit Library
################################################################################
Expand Down Expand Up @@ -247,14 +260,15 @@ target_link_libraries(ipc_toolkit PRIVATE ipc::toolkit::warnings)

## SIMD support
if(IPC_TOOLKIT_WITH_SIMD)
# Figure out SIMD support
message(STATUS "Testing SIMD capabilities...")
find_package(SIMD)
# Add SIMD flags to compiler flags
message(STATUS "Using SIMD flags: ${SIMD_FLAGS}")
target_compile_options(ipc_toolkit PRIVATE ${SIMD_FLAGS})
else()
message(STATUS "SIMD support disabled")
target_compile_options(ipc_toolkit PRIVATE ${SIMD_CXX_FLAGS})

# Link against cross-platform xsimd library
include(xsimd)
target_link_libraries(ipc_toolkit PRIVATE xsimd::xsimd)

# Disable vectorization in Eigen since I've found it to have alignment issues.
target_compile_definitions(Eigen3_Eigen INTERFACE EIGEN_DONT_VECTORIZE=1)
endif()

# For MSVC, do not use the min and max macros.
Expand Down
13 changes: 1 addition & 12 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,6 @@
"IPC_TOOLKIT_WITH_CUDA": "ON"
}
},
{
"name": "simd",
"inherits": "release",
"displayName": "SIMD Enabled",
"description": "Build with SIMD optimizations",
"binaryDir": "${sourceDir}/build/simd",
"cacheVariables": {
"IPC_TOOLKIT_WITH_SIMD": "ON"
}
},
{
"name": "test",
"inherits": "debug",
Expand All @@ -82,7 +72,6 @@
"cacheVariables": {
"IPC_TOOLKIT_BUILD_PYTHON": "ON",
"IPC_TOOLKIT_BUILD_TESTS": "OFF",
"IPC_TOOLKIT_WITH_SIMD": "OFF",
"IPC_TOOLKIT_WITH_CUDA": "OFF"
}
},
Expand Down Expand Up @@ -166,4 +155,4 @@
}
}
]
}
}
2 changes: 1 addition & 1 deletion IPCToolkitOptions.cmake.sample
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@
# option(IPC_TOOLKIT_BUILD_TESTS "Build unit-tests" ON)
# option(IPC_TOOLKIT_BUILD_PYTHON "Build Python bindings" OFF)
# option(IPC_TOOLKIT_WITH_CUDA "Enable CUDA CCD" OFF)
# option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" ON)
# option(IPC_TOOLKIT_WITH_RATIONAL_INTERSECTION "Use rational edge-triangle intersection check" OFF)
# option(IPC_TOOLKIT_WITH_ROBIN_MAP "Use Tessil's robin-map rather than std maps" ON)
# option(IPC_TOOLKIT_WITH_ABSEIL "Use Abseil's hash functions" ON)
# option(IPC_TOOLKIT_WITH_FILIB "Use filib for interval arithmetic" ON)
# option(IPC_TOOLKIT_WITH_INEXACT_CCD "Use the original inexact CCD method of IPC" OFF)
# option(IPC_TOOLKIT_WITH_SIMD "Enable SIMD" OFF)
# option(IPC_TOOLKIT_WITH_CODE_COVERAGE "Enable coverage reporting" OFF)
# option(IPC_TOOLKIT_TESTS_CCD_BENCHMARK "Enable CCD benchmark test" ON)
# set(IPC_TOOLKIT_TESTS_CCD_BENCHMARK_DIR "" CACHE PATH "Path to the CCD benchmark directory")
Expand Down
15 changes: 15 additions & 0 deletions cmake/recipes/xsimd.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# xsimd (https://github.com/xtensor-stack/xsimd)
# License: BSD-3-Clause
if(TARGET xsimd::xsimd)
return()
endif()

message(STATUS "Third-party: creating target 'xsimd::xsimd'")

include(CPM)
CPMAddPackage("gh:xtensor-stack/xsimd#14.0.0")
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CPMAddPackage("gh:xtensor-stack/xsimd#14.0.0") call introduces a supply-chain risk by fetching and building third-party code from GitHub pinned only to a mutable tag without any additional integrity verification. If the xtensor-stack/xsimd repository or its tags are compromised, CI/builds that have access to secrets could execute attacker-controlled code. To mitigate this, pin the dependency to an immutable commit SHA (and, if supported by CPM, enable checksum/signature verification) rather than relying solely on a version tag.

Copilot uses AI. Check for mistakes.

add_library(xsimd::xsimd ALIAS xsimd)

# Folder name for IDE
set_target_properties(xsimd PROPERTIES FOLDER "ThirdParty")
87 changes: 54 additions & 33 deletions src/ipc/broad_phase/lbvh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,16 @@
#include <tbb/parallel_for.h>
#include <tbb/parallel_sort.h>

#ifdef __APPLE__
// We utilize SIMD registers to compare 1 Node against 4 Queries simultaneously.
#include <simd/simd.h>
#ifdef IPC_TOOLKIT_WITH_SIMD
// We utilize SIMD registers to compare one node against multiple queries
// simultaneously, with the number of queries determined by
// xs::batch<float>::size.
#include <xsimd/xsimd.hpp>
namespace xs = xsimd;
#endif

#include <array>

using namespace std::placeholders;

namespace ipc {
Expand Down Expand Up @@ -448,9 +453,9 @@ namespace {
} while (node_idx != LBVH::Node::INVALID_POINTER); // Same as root
}

#ifdef __APPLE__
#ifdef IPC_TOOLKIT_WITH_SIMD
// SIMD Traversal
// Traverses 4 queries simultaneously using SIMD.
// Traverses multiple queries simultaneously using SIMD.
template <typename Candidate, bool swap_order, bool triangular>
void traverse_lbvh_simd(
const LBVH::Node* queries,
Expand All @@ -459,28 +464,37 @@ namespace {
const std::function<bool(size_t, size_t)>& can_collide,
std::vector<Candidate>& candidates)
{
assert(n_queries >= 1 && n_queries <= 4);
// Load 4 queries into single registers (Structure of Arrays)
auto make_simd = [&](auto F) -> simd_float4 {
return simd_float4 {
F(0),
n_queries > 1 ? F(1) : 0.0f,
n_queries > 2 ? F(2) : 0.0f,
n_queries > 3 ? F(3) : 0.0f,
};
using batch_t = xs::batch<float>;
assert(n_queries >= 1 && n_queries <= batch_t::size);

// Load queries into single registers
auto make_simd = [&](auto F) -> batch_t {
// 1. Create a buffer of the correct architecture-dependent size
alignas(xs::default_arch::alignment())
std::array<float, batch_t::size>
buffer {};

#pragma unroll
// 2. Fill the buffer, respecting the actual number of queries
for (size_t i = 0; i < batch_t::size; ++i) {
buffer[i] = (i < n_queries) ? F(static_cast<int>(i)) : 0.0f;
}

// 3. Load the buffer into the SIMD register
return batch_t::load_aligned(buffer.data());
};

const simd_float4 q_min_x =
const auto q_min_x =
make_simd([&](int k) { return queries[k].aabb_min.x(); });
const simd_float4 q_min_y =
const auto q_min_y =
make_simd([&](int k) { return queries[k].aabb_min.y(); });
const simd_float4 q_min_z =
const auto q_min_z =
make_simd([&](int k) { return queries[k].aabb_min.z(); });
const simd_float4 q_max_x =
const auto q_max_x =
make_simd([&](int k) { return queries[k].aabb_max.x(); });
const simd_float4 q_max_y =
const auto q_max_y =
make_simd([&](int k) { return queries[k].aabb_max.y(); });
const simd_float4 q_max_z =
const auto q_max_z =
make_simd([&](int k) { return queries[k].aabb_max.z(); });

// Use a fixed-size array as a stack to avoid dynamic allocations
Expand All @@ -505,31 +519,33 @@ namespace {
const LBVH::Node& child_l = lbvh[node.left];
const LBVH::Node& child_r = lbvh[node.right];

// 1. Intersect 4 queries at once
// 1. Intersect multiple queries at once
// (child_l.min <= query.max) && (query.min <= child_l.max)
const simd_int4 intersects_l = (child_l.aabb_min.x() <= q_max_x)
const xs::batch_bool<float> intersects_l =
(child_l.aabb_min.x() <= q_max_x)
& (child_l.aabb_min.y() <= q_max_y)
& (child_l.aabb_min.z() <= q_max_z)
& (q_min_x <= child_l.aabb_max.x())
& (q_min_y <= child_l.aabb_max.y())
& (q_min_z <= child_l.aabb_max.z());

// 2. Intersect 4 queries at once
// 2. Intersect multiple queries at once
// (child_r.min <= query.max) && (query.min <= child_r.max)
const simd_int4 intersects_r = (child_r.aabb_min.x() <= q_max_x)
const xs::batch_bool<float> intersects_r =
(child_r.aabb_min.x() <= q_max_x)
& (child_r.aabb_min.y() <= q_max_y)
& (child_r.aabb_min.z() <= q_max_z)
& (q_min_x <= child_r.aabb_max.x())
& (q_min_y <= child_r.aabb_max.y())
& (q_min_z <= child_r.aabb_max.z());

const bool any_intersects_l = simd_any(intersects_l);
const bool any_intersects_r = simd_any(intersects_r);
const bool any_intersects_l = xs::any(intersects_l);
const bool any_intersects_r = xs::any(intersects_r);

// Query overlaps a leaf node => report collision
if (any_intersects_l && child_l.is_leaf()) {
for (int k = 0; k < n_queries; ++k) {
if (intersects_l[k]) {
if (intersects_l.get(k)) {
attempt_add_candidate<
Candidate, swap_order, triangular>(
queries[k], child_l, can_collide, candidates);
Expand All @@ -538,7 +554,7 @@ namespace {
}
if (any_intersects_r && child_r.is_leaf()) {
for (int k = 0; k < n_queries; ++k) {
if (intersects_r[k]) {
if (intersects_r.get(k)) {
attempt_add_candidate<
Candidate, swap_order, triangular>(
queries[k], child_r, can_collide, candidates);
Expand Down Expand Up @@ -576,9 +592,12 @@ namespace {
const std::function<bool(size_t, size_t)>& can_collide,
tbb::enumerable_thread_specific<std::vector<Candidate>>& storage)
{
#ifdef __APPLE__ // Only support SIMD on Apple platforms for now
constexpr size_t SIMD_SIZE = use_simd ? 4 : 1;
constexpr size_t GRAIN_SIZE = use_simd ? 16 : 1;
#ifdef IPC_TOOLKIT_WITH_SIMD // Enable SIMD acceleration when available
constexpr size_t SIMD_SIZE = use_simd ? xs::batch<float>::size : 1;
static_assert(
64 % xs::batch<float>::size == 0, "GRAIN_SIZE must be an integer");
constexpr size_t GRAIN_SIZE =
use_simd ? (64 / xs::batch<float>::size) : 1;
#else
constexpr size_t SIMD_SIZE = 1;
constexpr size_t GRAIN_SIZE = 1;
Expand All @@ -595,11 +614,13 @@ namespace {
tbb::blocked_range<size_t>(size_t(0), n_tasks, GRAIN_SIZE),
[&](const tbb::blocked_range<size_t>& r) {
auto& local_candidates = storage.local();
#ifdef IPC_TOOLKIT_WITH_SIMD
const size_t actual_end = // Handle tail case
std::min(SIMD_SIZE * r.end(), n_source_leaves);
#endif
for (size_t i = r.begin(); i < r.end(); ++i) {
const size_t idx = SIMD_SIZE * i;
#ifdef __APPLE__
#ifdef IPC_TOOLKIT_WITH_SIMD
if constexpr (use_simd) {
assert(actual_end - idx >= 1);
traverse_lbvh_simd<Candidate, swap_order, triangular>(
Expand All @@ -611,7 +632,7 @@ namespace {
traverse_lbvh<Candidate, swap_order, triangular>(
source[source_leaf_offset + idx], target,
can_collide, local_candidates);
#ifdef __APPLE__
#ifdef IPC_TOOLKIT_WITH_SIMD
}
#endif
}
Expand Down
1 change: 1 addition & 0 deletions src/ipc/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#cmakedefine IPC_TOOLKIT_WITH_INEXACT_CCD
#cmakedefine IPC_TOOLKIT_WITH_RATIONAL_INTERSECTION
#cmakedefine IPC_TOOLKIT_WITH_CUDA
#cmakedefine IPC_TOOLKIT_WITH_SIMD
#cmakedefine IPC_TOOLKIT_WITH_ROBIN_MAP
#cmakedefine IPC_TOOLKIT_WITH_ABSEIL
#cmakedefine IPC_TOOLKIT_WITH_FILIB
Expand Down
7 changes: 7 additions & 0 deletions src/ipc/utils/eigen_ext.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@

#include <cassert>

#ifdef EIGEN_DONT_VECTORIZE
// NOTE: Avoid error about abs casting double to int. Eigen does this
// internally but seemingly only if EIGEN_DONT_VECTORIZE is not defined.
// TODO: We should always use std::abs to avoid this issue.
EIGEN_USING_STD(abs); // using std::abs;
#endif

namespace Eigen {
template <typename T> using RowRef = Ref<T, 0, Eigen::InnerStride<>>;
template <typename T> using ConstRef = const Ref<const T>&;
Expand Down