From fbaeac0c48155d28cd96bf5aa8d66b37cc85d8aa Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 22 Apr 2026 12:43:31 +0200 Subject: [PATCH 1/2] Use std::array in level_comparison dispatch --- cpp/src/arrow/util/dispatch_internal.h | 1 - cpp/src/parquet/level_comparison.cc | 22 ++++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/util/dispatch_internal.h b/cpp/src/arrow/util/dispatch_internal.h index 7ac19b0b2443..5aa7b01fde8a 100644 --- a/cpp/src/arrow/util/dispatch_internal.h +++ b/cpp/src/arrow/util/dispatch_internal.h @@ -18,7 +18,6 @@ #pragma once #include -#include #include "arrow/status.h" #include "arrow/util/cpu_info.h" diff --git a/cpp/src/parquet/level_comparison.cc b/cpp/src/parquet/level_comparison.cc index f8d972e6c686..2159ebe1a9cb 100644 --- a/cpp/src/parquet/level_comparison.cc +++ b/cpp/src/parquet/level_comparison.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include + #include "parquet/level_comparison.h" #if defined(ARROW_HAVE_RUNTIME_AVX2) @@ -25,8 +27,6 @@ #include "parquet/level_comparison_inc.h" #undef PARQUET_IMPL_NAMESPACE -#include - #include "arrow/util/dispatch_internal.h" namespace parquet::internal { @@ -40,12 +40,13 @@ using ::arrow::internal::DynamicDispatch; struct GreaterThanDynamicFunction { using FunctionType = decltype(&GreaterThanBitmap); + using Implementation = std::pair; - static std::vector> implementations() { - return {{DispatchLevel::NONE, standard::GreaterThanBitmapImpl} + static constexpr auto implementations() { + return std::array{ + Implementation{DispatchLevel::NONE, standard::GreaterThanBitmapImpl}, #if defined(ARROW_HAVE_RUNTIME_AVX2) - , - {DispatchLevel::AVX2, GreaterThanBitmapAvx2} + Implementation{DispatchLevel::AVX2, GreaterThanBitmapAvx2}, #endif }; } @@ -53,12 +54,13 @@ struct GreaterThanDynamicFunction { struct MinMaxDynamicFunction { using FunctionType = decltype(&FindMinMax); + using Implementation = std::pair; - static std::vector> implementations() { - return {{DispatchLevel::NONE, standard::FindMinMaxImpl} + static constexpr auto implementations() { + return std::array{ + Implementation{DispatchLevel::NONE, standard::FindMinMaxImpl}, #if defined(ARROW_HAVE_RUNTIME_AVX2) - , - {DispatchLevel::AVX2, FindMinMaxAvx2} + Implementation{DispatchLevel::AVX2, FindMinMaxAvx2}, #endif }; } From d1dd94db38240ee4dc75cb8bb73eaf225181ce17 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 22 Apr 2026 17:53:23 +0200 Subject: [PATCH 2/2] Refactor DynamicDispatch for auto static dispatch --- cpp/src/arrow/util/bpacking.cc | 2 +- .../arrow/util/byte_stream_split_internal.cc | 53 ++-- cpp/src/arrow/util/dispatch_internal.h | 258 ++++++++++++++++-- cpp/src/parquet/level_comparison.cc | 19 +- 4 files changed, 253 insertions(+), 79 deletions(-) diff --git a/cpp/src/arrow/util/bpacking.cc b/cpp/src/arrow/util/bpacking.cc index e959a9f9c411..0639baf7e50d 100644 --- a/cpp/src/arrow/util/bpacking.cc +++ b/cpp/src/arrow/util/bpacking.cc @@ -56,7 +56,7 @@ void unpack(const uint8_t* in, Uint* out, const UnpackOptions& opts) { return bpacking::unpack_neon(in, out, opts); #else static DynamicDispatch > dispatch; - return dispatch.func(in, out, opts); + return dispatch(in, out, opts); #endif } diff --git a/cpp/src/arrow/util/byte_stream_split_internal.cc b/cpp/src/arrow/util/byte_stream_split_internal.cc index f9f64747e1e2..489c15c551cf 100644 --- a/cpp/src/arrow/util/byte_stream_split_internal.cc +++ b/cpp/src/arrow/util/byte_stream_split_internal.cc @@ -24,6 +24,7 @@ namespace arrow::util::internal { using ::arrow::internal::DispatchLevel; using ::arrow::internal::DynamicDispatch; +using ::arrow::internal::DynamicDispatchTarget; /************************ * Decode dispatching * @@ -32,28 +33,16 @@ using ::arrow::internal::DynamicDispatch; template struct ByteStreamSplitDecodeDynamic { using FunctionType = decltype(&ByteStreamSplitDecodeScalar); - using Implementation = std::pair; constexpr static auto implementations() { return std::array{ - Implementation{ - DispatchLevel::NONE, -#if defined(ARROW_HAVE_NEON) - // We always expect Neon to be available on Arm64 - &ByteStreamSplitDecodeSimd, -#elif defined(ARROW_HAVE_SSE4_2) - // We always expect SSE4.2 to be available on x86_64 - &ByteStreamSplitDecodeSimd, -#else - &ByteStreamSplitDecodeScalar, -#endif - }, -#if defined(ARROW_HAVE_RUNTIME_AVX2) - Implementation{ - DispatchLevel::AVX2, - &ByteStreamSplitDecodeSimd, - }, -#endif + ARROW_DISPATCH_TARGET_NONE(&ByteStreamSplitDecodeScalar) // + ARROW_DISPATCH_TARGET_NEON( + (&ByteStreamSplitDecodeSimd)) // + ARROW_DISPATCH_TARGET_SSE4_2( + (&ByteStreamSplitDecodeSimd)) // + ARROW_DISPATCH_TARGET_AVX2( + (&ByteStreamSplitDecodeSimd)) // }; } }; @@ -62,7 +51,7 @@ template void ByteStreamSplitDecodeSimdDispatch(const uint8_t* data, int width, int64_t num_values, int64_t stride, uint8_t* out) { static const DynamicDispatch> dispatch; - return dispatch.func(data, width, num_values, stride, out); + return dispatch(data, width, num_values, stride, out); } template void ByteStreamSplitDecodeSimdDispatch<2>(const uint8_t*, int, int64_t, int64_t, @@ -79,25 +68,15 @@ template void ByteStreamSplitDecodeSimdDispatch<8>(const uint8_t*, int, int64_t, template struct ByteStreamSplitEncodeDynamic { using FunctionType = decltype(&ByteStreamSplitEncodeScalar); - using Implementation = std::pair; constexpr static auto implementations() { return std::array{ - Implementation{ - DispatchLevel::NONE, -#if defined(ARROW_HAVE_NEON) - // We always expect Neon to be available on Arm64 - &ByteStreamSplitEncodeSimd, -#elif defined(ARROW_HAVE_SSE4_2) - // We always expect SSE4.2 to be available on x86_64 - &ByteStreamSplitEncodeSimd, -#else - &ByteStreamSplitEncodeScalar, -#endif - }, -#if defined(ARROW_HAVE_RUNTIME_AVX2) - Implementation{DispatchLevel::AVX2, &ByteStreamSplitEncodeAvx2}, -#endif + ARROW_DISPATCH_TARGET_NONE(&ByteStreamSplitEncodeScalar) // + ARROW_DISPATCH_TARGET_NEON( // + (&ByteStreamSplitEncodeSimd)) // + ARROW_DISPATCH_TARGET_SSE4_2( // + (&ByteStreamSplitEncodeSimd)) // + ARROW_DISPATCH_TARGET_AVX2((&ByteStreamSplitEncodeAvx2)) // }; } }; @@ -107,7 +86,7 @@ void ByteStreamSplitEncodeSimdDispatch(const uint8_t* raw_values, int width, const int64_t num_values, uint8_t* output_buffer_raw) { static const DynamicDispatch> dispatch; - return dispatch.func(raw_values, width, num_values, output_buffer_raw); + return dispatch(raw_values, width, num_values, output_buffer_raw); } template void ByteStreamSplitEncodeSimdDispatch<2>(const uint8_t*, int, const int64_t, diff --git a/cpp/src/arrow/util/dispatch_internal.h b/cpp/src/arrow/util/dispatch_internal.h index 5aa7b01fde8a..0c63807c088c 100644 --- a/cpp/src/arrow/util/dispatch_internal.h +++ b/cpp/src/arrow/util/dispatch_internal.h @@ -17,13 +17,15 @@ #pragma once +#include +#include +#include #include #include "arrow/status.h" #include "arrow/util/cpu_info.h" -namespace arrow { -namespace internal { +namespace arrow::internal { enum class DispatchLevel : int { // These dispatch levels, corresponding to instruction set features, @@ -36,6 +38,156 @@ enum class DispatchLevel : int { MAX }; +/// A pair of function dispatch level and +template +using DynamicDispatchTarget = std::pair; + +namespace detail { + +/// A trait for checking if a type is a static ``std::array``. +template +inline constexpr bool is_std_array_v = false; + +template +inline constexpr bool is_std_array_v> = true; + +} // namespace detail + +/// A concept for an array of functions pointers and their dynamic dispatch level. +template +concept DynamicDispatchTargets = + detail::is_std_array_v && + std::is_same_v>; + +/// Return whether a given dispatch level is static. +/// +/// This depends on macros defined in the build options. +constexpr bool DispatchIsStatic(DispatchLevel level) { + switch (level) { +#ifdef ARROW_HAVE_SSE4_2 + case DispatchLevel::SSE4_2: +#endif +#ifdef ARROW_HAVE_AVX2 + case DispatchLevel::AVX2: +#endif +#ifdef ARROW_HAVE_AVX512 + case DispatchLevel::AVX512: +#endif +#ifdef ARROW_HAVE_NEON + case DispatchLevel::NEON: +#endif + case DispatchLevel::NONE: + return true; + default: + return false; + } +} + +/// Return whether all function in the array can be statically dispatched. +template +constexpr bool DispatchFullyStatic(const DynamicDispatchTargets auto& targets) { + return std::ranges::all_of(targets, [](const DynamicDispatchTarget& trgt) { + return DispatchIsStatic(trgt.first); + }); +} + +/// Return whether any function in the array can be statically dispatched. +/// Return false on empty sets. +template +constexpr bool DispatchHasStatic(const DynamicDispatchTargets auto& targets) { + return std::ranges::any_of(targets, [](const DynamicDispatchTarget& trgt) { + return DispatchIsStatic(trgt.first); + }); +} + +/// Find the best dispatch target given a filter. +template +constexpr DynamicDispatchTarget BestDispatchTarget( + const DynamicDispatchTargets auto& targets, Filter filter) { + DynamicDispatchTarget best = {}; + for (const auto& trgt : targets) { + if (trgt.first >= best.first && filter(trgt)) { + best = trgt; + } + } + return best; +} + +/// Find the best dispatch target (no filter). +template +constexpr DynamicDispatchTarget BestDispatchTarget( + const DynamicDispatchTargets auto& targets) { + return BestDispatchTarget(targets, [](const auto&) { return true; }); +} + +#define ARROW_DISPATCH_TARGET_NONE(func) \ + ::arrow::internal::DynamicDispatchTarget{ \ + ::arrow::internal::DispatchLevel::NONE, \ + (func), \ + }, + +#if defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_RUNTIME_SSE4_2) +# define ARROW_DISPATCH_TARGET_SSE4_2(func) \ + ::arrow::internal::DynamicDispatchTarget{ \ + ::arrow::internal::DispatchLevel::SSE4_2, \ + (func), \ + }, +#else +# define ARROW_DISPATCH_TARGET_SSE4_2(func) +#endif + +#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_RUNTIME_AVX2) +# define ARROW_DISPATCH_TARGET_AVX2(func) \ + ::arrow::internal::DynamicDispatchTarget{ \ + ::arrow::internal::DispatchLevel::AVX2, \ + (func), \ + }, +#else +# define ARROW_DISPATCH_TARGET_AVX2(func) +#endif + +#if defined(ARROW_HAVE_AVX512) || defined(ARROW_HAVE_RUNTIME_AVX512) +# define ARROW_DISPATCH_TARGET_AVX512(func) \ + ::arrow::internal::DynamicDispatchTarget{ \ + ::arrow::internal::DispatchLevel::AVX512, \ + (func), \ + }, +#else +# define ARROW_DISPATCH_TARGET_AVX512(func) +#endif + +#if defined(ARROW_HAVE_NEON) +# define ARROW_DISPATCH_TARGET_NEON(func) \ + ::arrow::internal::DynamicDispatchTarget{ \ + ::arrow::internal::DispatchLevel::NEON, \ + (func), \ + }, +#else +# define ARROW_DISPATCH_TARGET_NEON(func) +#endif + +/// A concept to specify how dynamic dispatch should be handled. +/// +/// A requirement is that the list of available targets must be compile time +/// array with at least one target available for static dispatch. +template +concept DynamicDispatchSpec = requires { + typename T::FunctionType; + + { T::implementations() } -> DynamicDispatchTargets; + requires T::implementations().size() > 0; + requires DispatchHasStatic(T::implementations()); +}; + +/// Refinement of DynamicDispatchSpec where all targets are statically available. +/// +/// Subsumes DynamicDispatchSpec, enabling a more specialized DynamicDispatch +/// implementation. +template +concept DynamicDispatchFullyStaticSpec = + DynamicDispatchSpec && + DispatchFullyStatic(T::implementations()); + /* A facility for dynamic dispatch according to available DispatchLevel. @@ -47,7 +199,7 @@ enum class DispatchLevel : int { struct MyDynamicFunction { using FunctionType = decltype(&my_function_default); - static std::vector> implementations() { + static std::array, N> implementations() { return { { DispatchLevel::NONE, my_function_default } #if defined(ARROW_HAVE_RUNTIME_AVX2) @@ -62,37 +214,70 @@ enum class DispatchLevel : int { return dispatch.func(...); } */ -template -class DynamicDispatch { - protected: - using FunctionType = typename DynamicFunction::FunctionType; - using Implementation = std::pair; - public: - DynamicDispatch() { Resolve(DynamicFunction::implementations()); } - - FunctionType func = {}; +/// Dynamic dispatcher between function with different micro architectures. +/// +/// The dispatcher is configured with a ``DynamicDispatchSpec`` to list available +/// targets (function and dispatch level pair). +/// The dispatch mechanism uses a combination of compile time computation and +/// preprocessor macros to fallback to the best static dispatch when, due to build +/// configurations, no tartget is dynamically available. +/// This is for example the case on MacOS where Neon is always available while SVE +/// never is. This is also the case when an Arrow is compiled with and advance baseline. +/// For instance if the baseline is AVX2 and that there is no AVX512 target provided, +/// then the dispatch will be fully static. +/// +/// Typical usage involves ``ARROW_DISPATCH_TARGET_`` macros to avoid referencing +/// functions that may not be available on certain build configurations. +/// +/// ```cpp +/// struct MyFunctionDyn { +/// using FunctionType = decltype(&MyFuncScalar); +/// +/// static constexpr auto implementations() { +/// return std::array{ +/// ARROW_DISPATCH_TARGET_NONE(&MyFuncScalar) // +/// ARROW_DISPATCH_TARGET_NEON(&MyFuncNeon) // +/// ARROW_DISPATCH_TARGET_SSE4_2(&MyFuncSse42) // +/// ARROW_DISPATCH_TARGET_AVX2(&MyFuncAvx2) // +/// ARROW_DISPATCH_TARGET_AVX512(&MyFuncAvx512) // +/// }; +/// } +/// }; +/// ``` +/// +/// And then used with the ``DynamicDispatch`` as such: +/// +/// ```cpp +/// int MyFunc(const uint8_t* input, int param) { +/// static const DynamicDispatch dispatch; +/// return dispatch(input, param); +/// } +/// ``` +template +class DynamicDispatch; - protected: - // Use the Implementation with the highest DispatchLevel - template - void Resolve(const Range& implementations) { - Implementation cur{DispatchLevel::NONE, {}}; +template +class DynamicDispatch { + public: + using FunctionType = typename DynamicFunction::FunctionType; + using Target = DynamicDispatchTarget; + static constexpr auto kTargets = DynamicFunction::implementations(); - for (const auto& impl : implementations) { - if (impl.first >= cur.first && IsSupported(impl.first)) { - // Higher (or same) level than current - cur = impl; - } - } + DynamicDispatch() { + const auto best = BestDispatchTarget( + kTargets, [this](const Target& trgt) { return IsSupported(trgt.first); }); + func = best.second; + } - if (!cur.second) { - Status::Invalid("No appropriate implementation found").Abort(); - } - func = cur.second; + template + auto operator()(Args&&... args) const -> decltype(auto) { + return func(std::forward(args)...); } private: + FunctionType func = {}; + bool IsSupported(DispatchLevel level) const { static const auto cpu_info = arrow::internal::CpuInfo::GetInstance(); @@ -111,5 +296,20 @@ class DynamicDispatch { } }; -} // namespace internal -} // namespace arrow +/// Specialization for the fully-static case: best target is resolved at compile time, +/// no runtime CPU detection needed. +template +class DynamicDispatch { + public: + using FunctionType = typename DynamicFunction::FunctionType; + using Target = DynamicDispatchTarget; + static constexpr auto kTargets = DynamicFunction::implementations(); + static constexpr FunctionType kBest = BestDispatchTarget(kTargets).second; + + template + auto operator()(Args&&... args) const -> decltype(auto) { + return kBest(std::forward(args)...); + } +}; + +} // namespace arrow::internal diff --git a/cpp/src/parquet/level_comparison.cc b/cpp/src/parquet/level_comparison.cc index 2159ebe1a9cb..ffde506eb8fa 100644 --- a/cpp/src/parquet/level_comparison.cc +++ b/cpp/src/parquet/level_comparison.cc @@ -35,33 +35,28 @@ namespace { using ::arrow::internal::DispatchLevel; using ::arrow::internal::DynamicDispatch; +using ::arrow::internal::DynamicDispatchTarget; // defined in level_comparison_avx2.cc struct GreaterThanDynamicFunction { using FunctionType = decltype(&GreaterThanBitmap); - using Implementation = std::pair; static constexpr auto implementations() { return std::array{ - Implementation{DispatchLevel::NONE, standard::GreaterThanBitmapImpl}, -#if defined(ARROW_HAVE_RUNTIME_AVX2) - Implementation{DispatchLevel::AVX2, GreaterThanBitmapAvx2}, -#endif + ARROW_DISPATCH_TARGET_NONE(&standard::GreaterThanBitmapImpl) // + ARROW_DISPATCH_TARGET_AVX2(&GreaterThanBitmapAvx2) // }; } }; struct MinMaxDynamicFunction { using FunctionType = decltype(&FindMinMax); - using Implementation = std::pair; static constexpr auto implementations() { return std::array{ - Implementation{DispatchLevel::NONE, standard::FindMinMaxImpl}, -#if defined(ARROW_HAVE_RUNTIME_AVX2) - Implementation{DispatchLevel::AVX2, FindMinMaxAvx2}, -#endif + ARROW_DISPATCH_TARGET_NONE(&standard::FindMinMaxImpl) // + ARROW_DISPATCH_TARGET_AVX2(&FindMinMaxAvx2) // }; } }; @@ -70,12 +65,12 @@ struct MinMaxDynamicFunction { uint64_t GreaterThanBitmap(const int16_t* levels, int64_t num_levels, int16_t rhs) { static DynamicDispatch dispatch; - return dispatch.func(levels, num_levels, rhs); + return dispatch(levels, num_levels, rhs); } MinMax FindMinMax(const int16_t* levels, int64_t num_levels) { static DynamicDispatch dispatch; - return dispatch.func(levels, num_levels); + return dispatch(levels, num_levels); } } // namespace parquet::internal