From eaa8da36b266651a86abb1e1a248f1edf6ddd746 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Wed, 4 Mar 2026 14:51:34 -0500 Subject: [PATCH] Add support for configurable SIMD modes in CMake build system --- .github/workflows/macos-ci.yml | 19 +++++++---- .github/workflows/ubuntu-ci.yml | 39 +++++++++++++++------- .github/workflows/vs-ci.yml | 4 +-- .github/workflows/vs17-arm-ci.yml | 4 +-- CMakeLists.txt | 34 +++++++++++++++---- README.md | 47 +++++++++++++++++++++++++++ cmake_modules/DetectCPUFeatures.cmake | 10 +++--- 7 files changed, 125 insertions(+), 32 deletions(-) diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml index d0035c05..997e515d 100644 --- a/.github/workflows/macos-ci.yml +++ b/.github/workflows/macos-ci.yml @@ -4,16 +4,23 @@ on: [push, pull_request] jobs: macos-build: + name: macos (${{ matrix.simd_mode }}) runs-on: macos-latest + strategy: + fail-fast: false + matrix: + simd_mode: [portable, native] + steps: - - uses: actions/checkout@v2 - - name: Use cmake (default) + - uses: actions/checkout@v6 + - name: Build and test (Release, ${{ matrix.simd_mode }}) run: | - cmake -B build -D CMAKE_BUILD_TYPE=Release + cmake -B build -D CMAKE_BUILD_TYPE=Release -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }} cmake --build build ctest --test-dir build --output-on-failure - - name: Use cmake (debug) + - name: Build and test (Debug, ${{ matrix.simd_mode }}) run: | - cmake -B build -D CMAKE_BUILD_TYPE=Debug + rm -rf build + cmake -B build -D CMAKE_BUILD_TYPE=Debug -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }} cmake --build build - ctest --test-dir build --output-on-failure \ No newline at end of file + ctest --test-dir build --output-on-failure diff --git a/.github/workflows/ubuntu-ci.yml b/.github/workflows/ubuntu-ci.yml index b0bd4c7f..a1c41aa1 100644 --- a/.github/workflows/ubuntu-ci.yml +++ b/.github/workflows/ubuntu-ci.yml @@ -2,31 +2,46 @@ name: Ubuntu-CI on: [push, pull_request] - - jobs: ci: - name: ubuntu-gcc + name: ubuntu-gcc (${{ matrix.simd_mode }}) runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + simd_mode: [portable, native] env: CC: gcc CXX: g++ - steps: - - uses: actions/checkout@v2 - - name: Use cmake (default) + steps: + - uses: actions/checkout@v6 + - name: Build and test (Release, ${{ matrix.simd_mode }}) run: | - cmake -B build -D CMAKE_BUILD_TYPE=Release + cmake -B build -D CMAKE_BUILD_TYPE=Release -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }} cmake --build build ctest --test-dir build --output-on-failure - - name: Use cmake (debug) + - name: Build and test (Debug, ${{ matrix.simd_mode }}) run: | - cmake -B build -D CMAKE_BUILD_TYPE=Debug + rm -rf build + cmake -B build -D CMAKE_BUILD_TYPE=Debug -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }} cmake --build build ctest --test-dir build --output-on-failure - - name: Use cmake (sanitizers) + + + ci-sanitizers: + name: ubuntu-gcc (sanitizers) + runs-on: ubuntu-latest + + env: + CC: gcc + CXX: g++ + + steps: + - uses: actions/checkout@v6 + - name: Build and test with sanitizers run: | - cmake -B build -D FASTPFOR_SANITIZE=ON + cmake -B build -D FASTPFOR_SANITIZE=ON -D FASTPFOR_SIMD_MODE=portable cmake --build build - ctest --test-dir build --output-on-failure \ No newline at end of file + ctest --test-dir build --output-on-failure diff --git a/.github/workflows/vs-ci.yml b/.github/workflows/vs-ci.yml index 77940cd6..cace824e 100644 --- a/.github/workflows/vs-ci.yml +++ b/.github/workflows/vs-ci.yml @@ -8,7 +8,7 @@ jobs: runs-on: windows-latest steps: - name: checkout - uses: actions/checkout@v2 + uses: actions/checkout@v6 - name: Configure run: | cmake -B build @@ -23,4 +23,4 @@ jobs: - name: Run Debug tests run: | cd build - ctest -C Debug -LE explicitonly --output-on-failure \ No newline at end of file + ctest -C Debug -LE explicitonly --output-on-failure diff --git a/.github/workflows/vs17-arm-ci.yml b/.github/workflows/vs17-arm-ci.yml index 74b55b72..4bac4be7 100644 --- a/.github/workflows/vs17-arm-ci.yml +++ b/.github/workflows/vs17-arm-ci.yml @@ -13,7 +13,7 @@ jobs: - {gen: Visual Studio 17 2022, arch: ARM64} steps: - name: checkout - uses: actions/checkout@v2 + uses: actions/checkout@v6 - name: Configure run: | cmake -B build @@ -28,4 +28,4 @@ jobs: - name: Run Debug tests run: | cd build - ctest -C Debug -LE explicitonly --output-on-failure \ No newline at end of file + ctest -C Debug -LE explicitonly --output-on-failure diff --git a/CMakeLists.txt b/CMakeLists.txt index c32c332f..e8dfe8d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,13 +87,35 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL target_compile_options(FastPFOR PRIVATE -Wall -Wextra -Weffc++ -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wno-sign-conversion) endif() include(CheckCXXCompilerFlag) -unset(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE CACHE) -CHECK_CXX_COMPILER_FLAG(-march=native FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE) -if(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE) - target_compile_options(FastPFOR PRIVATE -march=native) + +# SIMD mode: native, portable, or runtime +# - native: Use -march=native for maximum performance on the build machine (not portable) +# - portable: Use baseline SSE4.2 only for maximum compatibility (default) +# - runtime: Use function multi-versioning for runtime CPU dispatch (future) +set(FASTPFOR_SIMD_MODE "native" CACHE STRING "SIMD compilation mode: native, portable, or runtime") +set_property(CACHE FASTPFOR_SIMD_MODE PROPERTY STRINGS native portable runtime) + +if(FASTPFOR_SIMD_MODE STREQUAL "native") + unset(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE CACHE) + CHECK_CXX_COMPILER_FLAG(-march=native FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE) + if(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE) + target_compile_options(FastPFOR PRIVATE -march=native) + else() + message(STATUS "native target not supported, falling back to portable mode") + target_compile_options(FastPFOR PRIVATE -msse4.2) + endif() +elseif(FASTPFOR_SIMD_MODE STREQUAL "portable") + # Baseline: SSE4.2 is required by FastPFOR SIMD code + target_compile_options(FastPFOR PRIVATE -msse4.2) +elseif(FASTPFOR_SIMD_MODE STREQUAL "runtime") + # Runtime dispatch: compile with baseline SSE4.2 and enable multi-versioning + target_compile_options(FastPFOR PRIVATE -msse4.2) + target_compile_definitions(FastPFOR PRIVATE FASTPFOR_RUNTIME_DISPATCH) + message(STATUS "Runtime dispatch mode is experimental") else() - message(STATUS "native target not supported") + message(FATAL_ERROR "Invalid FASTPFOR_SIMD_MODE: ${FASTPFOR_SIMD_MODE}. Use native, portable, or runtime.") endif() +message(STATUS "FASTPFOR_SIMD_MODE: ${FASTPFOR_SIMD_MODE}") MESSAGE( STATUS "CMAKE_CXX_FLAGS_DEBUG: " ${CMAKE_CXX_FLAGS_DEBUG} ) MESSAGE( STATUS "CMAKE_CXX_FLAGS_RELEASE: " ${CMAKE_CXX_FLAGS_RELEASE} ) @@ -217,4 +239,4 @@ if (SUPPORT_NEON) message(WARNING "Building with emulation with SIMDE for ARM NEON support.") message(WARNING "We do not actually support ARM NEON natively.") message(WARNING "If you actually want native ARM NEON support, please consider providing a patch.") -endif() \ No newline at end of file +endif() diff --git a/README.md b/README.md index 4d78f8fb..23d25579 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,53 @@ It may be necessary to set the CXX variable. The project is installable (`make i To create project files for Microsoft Visual Studio, it might be useful to target 64-bit Windows (e.g., see http://www.cmake.org/cmake/help/v3.0/generator/Visual%20Studio%2012%202013.html). +### CMake Options + +#### FASTPFOR_SIMD_MODE + +Controls how SIMD instructions are compiled. This affects portability and performance: + +| Mode | Flag | Description | +|------|------|-------------| +| `portable` | `-msse4.2` | **Default.** Compiles with SSE4.2 baseline only. Binaries will run on any x86-64 CPU from ~2008 onwards. Best for distributable libraries and CI builds. | +| `native` | `-march=native` | Compiles with all SIMD instructions supported by the build machine (may include AVX, AVX2, AVX-512, etc.). Maximum performance but binaries may crash with `SIGILL` on CPUs that lack the required instructions. | +| `runtime` | `-msse4.2` + `FASTPFOR_RUNTIME_DISPATCH` | Experimental. Intended for future runtime CPU dispatch using function multi-versioning. | + +**Usage:** + +```bash +# Portable build (default) - safe for distribution +cmake -B build -DFASTPFOR_SIMD_MODE=portable + +# Native build - maximum performance on build machine +cmake -B build -DFASTPFOR_SIMD_MODE=native + +# Check which mode is active in CMake output +cmake -B build +# Look for: "FASTPFOR_SIMD_MODE: portable" in the output +``` + +**When to use each mode:** + +- Use `portable` (default) when building binaries that will run on different machines, in CI/CD pipelines, or when distributing pre-built libraries. +- Use `native` when building for a specific machine where maximum performance is needed and you know the binary won't be moved to a different CPU. + +#### FASTPFOR_SANITIZE + +Enable address sanitizer for debugging memory issues: + +```bash +cmake -B build -DFASTPFOR_SANITIZE=ON +``` + +#### FASTPFOR_WITH_TEST + +Build with Google Test (enabled by default): + +```bash +cmake -B build -DFASTPFOR_WITH_TEST=OFF # Disable tests +``` + ### Multithreaded context You should not assume that our objects are thread safe. diff --git a/cmake_modules/DetectCPUFeatures.cmake b/cmake_modules/DetectCPUFeatures.cmake index 412f53b3..c22d039c 100644 --- a/cmake_modules/DetectCPUFeatures.cmake +++ b/cmake_modules/DetectCPUFeatures.cmake @@ -30,7 +30,7 @@ return _mm256_movemask_epi8(x); }") if(MSVC) - message(STATUS "TEST 2") + message(STATUS "Detecting CPU features (MSVC)") set(CMAKE_REQUIRED_FLAGS "/EHsc /arch:SSE2") check_cxx_source_compiles("${SSE4PROG}" SUPPORT_SSE42) message(STATUS "SUPPORT_SSE42 ${SUPPORT_SSE42}") @@ -41,11 +41,13 @@ if(MSVC) check_cxx_source_compiles("${AVX2PROG}" SUPPORT_AVX2) message(STATUS "SUPPORT_AVX2 ${SUPPORT_AVX2}") else() - set(CMAKE_REQUIRED_FLAGS "-march=native -msse4.2") + # Test compiler support for SIMD instruction sets + # Note: This tests compiler capability, not runtime CPU support + set(CMAKE_REQUIRED_FLAGS "-msse4.2") check_cxx_source_compiles("${SSE4PROG}" SUPPORT_SSE42) - set(CMAKE_REQUIRED_FLAGS "-march=native -mavx") + set(CMAKE_REQUIRED_FLAGS "-mavx") check_cxx_source_compiles("${AVXPROG}" SUPPORT_AVX) - set(CMAKE_REQUIRED_FLAGS "-march=native -mavx2") + set(CMAKE_REQUIRED_FLAGS "-mavx2") check_cxx_source_compiles("${AVX2PROG}" SUPPORT_AVX2) endif()