diff --git a/.devcontainer/coverage.sh b/.devcontainer/coverage.sh index 99addc712e366c52c12b7f13f5bbc102700c23a7..84f6b4eae60be09b31331df6f5be7192a8c98dc7 100755 --- a/.devcontainer/coverage.sh +++ b/.devcontainer/coverage.sh @@ -9,7 +9,7 @@ set -eu BUILD_ID="kleidicv-coverage" \ CMAKE_CXX_FLAGS="--target=aarch64-linux-gnu --coverage" \ CMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -static -fuse-ld=lld" \ -EXTRA_CMAKE_ARGS="-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" \ +EXTRA_CMAKE_ARGS="-DKLEIDICV_ENABLE_SME2=ON -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" \ ./scripts/build.sh kleidicv-test # Clean any coverage results from previous runs diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 8c7747b2c0433a73bdba93a5ee9c6bf14374a8ad..75f25995d6587b73eb544881346405dead816eee 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -13,7 +13,7 @@ "env": { "CMAKE_CXX_FLAGS": "--target=aarch64-linux-gnu", "CMAKE_EXE_LINKER_FLAGS": "--rtlib=compiler-rt -static -fuse-ld=lld", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=1" + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_ENABLE_SME2=ON -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=1" } }, "group": { @@ -35,7 +35,7 @@ "CMAKE_BUILD_TYPE": "Debug", "CMAKE_CXX_FLAGS": "--target=aarch64-linux-gnu", "CMAKE_EXE_LINKER_FLAGS": "--rtlib=compiler-rt -static -fuse-ld=lld", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_ENABLE_SME2=ON -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" } }, "group": { @@ -76,7 +76,7 @@ "BUILD_ID": "kleidicv-benchmark", "CMAKE_CXX_FLAGS": "--target=aarch64-linux-gnu", "CMAKE_EXE_LINKER_FLAGS": "--rtlib=compiler-rt -static -fuse-ld=lld", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_BENCHMARK=ON" + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_ENABLE_SME2=ON -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_BENCHMARK=ON" } }, "group": { diff --git a/CHANGELOG.md b/CHANGELOG.md index b0f6b44f1e5de529e458927562f651589f1fa533..18ef579e04d33981b088e2dd63df3029b0126430 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,12 @@ This changelog aims to follow the guiding principles of - Implementation for cv::pyrDown in the OpenCV HAL. - Sum implementation for 1-channel f32 input (not exposed to OpenCV) +### Changed +- Build options `KLEIDICV_ENABLE_SVE2` and `KLEIDICV_ENABLE_SME2` take effect directly. + Previously the build scripts had additional checks that attempted to identify whether the compiler supported SVE2/SME2 - these checks have been removed. +- The default setting for `KLEIDICV_ENABLE_SVE2` is on for some popular compilers known to support SVE2, otherwise off. +- `KLEIDICV_ENABLE_SME2` defaults to off. This is because the ACLE SME specification has not yet been finalized. + ## 0.2.0 - 2024-09-30 ### Added diff --git a/adapters/opencv/CMakeLists.txt b/adapters/opencv/CMakeLists.txt index 6ff36b43598f9fa75b51eacfa771a42fd37b0e8c..66cc4725f1f081a393648e9f89a62ba98b57be7b 100644 --- a/adapters/opencv/CMakeLists.txt +++ b/adapters/opencv/CMakeLists.txt @@ -36,7 +36,7 @@ if(NOT BUILD_SHARED_LIBS) DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev ) - if(KLEIDICV_BUILD_SVE2) + if(KLEIDICV_ENABLE_SVE2) ocv_install_target(kleidicv_sve2 EXPORT OpenCVModules ARCHIVE @@ -44,7 +44,7 @@ if(NOT BUILD_SHARED_LIBS) COMPONENT dev ) endif() - if(KLEIDICV_BUILD_SME2) + if(KLEIDICV_ENABLE_SME2) ocv_install_target(kleidicv_sme2 EXPORT OpenCVModules ARCHIVE diff --git a/doc/build.md b/doc/build.md index 69137b6420a7066b521ab59105a1124749b00618..14c01d83b919f0dde2d9ec2a31271a9219b1f483 100644 --- a/doc/build.md +++ b/doc/build.md @@ -119,7 +119,7 @@ cmake --build build-kleidicv-linux --parallel In addition to the standard CMake settings, KleidiCV behaviour can be modified at build time via the following CMake options: - `KLEIDICV_BENCHMARK` - Enable building KleidiCV benchmarks. The benchmarks use Google Benchmark which will be downloaded automatically. Off by default. -- `KLEIDICV_ENABLE_SME2` - Enable Scalable Matrix Extension 2 and Streaming Scalable Vector Extension code paths if supported by the compiler. On by default. - - `KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS` - Limit Scalable Matrix Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SME2` is false. -- `KLEIDICV_ENABLE_SVE2` - Enable Scalable Vector Extension 2 code paths if supported by the compiler. On by default. - - `KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS` - Limit Scalable Vector Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SVE2` is false. +- `KLEIDICV_ENABLE_SME2` - Enable Scalable Matrix Extension 2 and Streaming Scalable Vector Extension code paths. Off by default while the [ACLE SME specification is in beta](https://github.com/ARM-software/acle/blob/main/main/acle.md#sme-language-extensions-and-intrinsics). + - `KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS` - Limit Scalable Matrix Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SME2` is off. +- `KLEIDICV_ENABLE_SVE2` - Enable Scalable Vector Extension 2 code paths. This is on by default for some popular compilers known to support SVE2 but otherwise off by default. + - `KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS` - Limit Scalable Vector Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SVE2` is off. diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt index 4c17e4e6f70ae9e86871cef7fcef20037c2b7c78..0b0fa2c81eeb1040e309c5d4b49b946e59bfdaaf 100644 --- a/kleidicv/CMakeLists.txt +++ b/kleidicv/CMakeLists.txt @@ -6,15 +6,29 @@ cmake_minimum_required(VERSION 3.16) project("KleidiCV") -include(CheckCXXCompilerFlag) - -if (CMAKE_SYSTEM_NAME MATCHES "^(Linux|Android)$") - option(KLEIDICV_ENABLE_SVE2 "Enable SVE2 code paths" ON) - option(KLEIDICV_ENABLE_SME2 "Enable SME2 code paths" ON) +# Choose the default value for the KLEIDICV_ENABLE_SVE2 option +# according to the compiler version. The list of compiler versions +# recognised as supporting SVE may be extended in future. +# check_cxx_compiler_flag is not used to test whether the compiler +# supports +sve2 since this may succeed for compilers that have only +# partial SVE support. +if ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) +) + set(KLEIDICV_ENABLE_SVE2_DEFAULT ON) else() - set(KLEIDICV_ENABLE_SVE2 OFF) - set(KLEIDICV_ENABLE_SME2 OFF) + set(KLEIDICV_ENABLE_SVE2_DEFAULT OFF) endif() + +option(KLEIDICV_ENABLE_SVE2 "Enable SVE2 code paths" ${KLEIDICV_ENABLE_SVE2_DEFAULT}) + +# KleidiCV's SME2 support is known to be compatible with Clang 19.1 but +# it is disabled by default while the ACLE SME specification is in beta. +# https://github.com/ARM-software/acle/blob/main/main/acle.md#sme-language-extensions-and-intrinsics +option(KLEIDICV_ENABLE_SME2 "Enable SME2 code paths" OFF) + option( KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS "Limits SME2 code paths to selected algorithms. Has no effect if KLEIDICV_ENABLE_SME2 is false." @@ -37,45 +51,11 @@ option(KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE "Internal - If turned ON interlea option(KLEIDICV_EXPERIMENTAL_FEATURE_CANNY "Internal - Enable experimental Canny algorithm" OFF) option(KLEIDICV_CANNY_ALGORITHM_CONFORM_OPENCV "Internal - If turned ON Canny algorithm creates bit exact result compared to OpenCV's original implementation" ON) -# Compiler feature check for SVE2. -set(KLEIDICV_BUILD_SVE2 OFF) -if (KLEIDICV_ENABLE_SVE2) - check_cxx_compiler_flag("-march=armv8-a+sve2" KLEIDICV_COMPILER_SUPPORTS_SVE2) - - # SVE2 support is incomplete in earlier versions of Clang. - if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12) - set(KLEIDICV_COMPILER_SUPPORTS_SVE2 OFF) - endif() - - if (KLEIDICV_COMPILER_SUPPORTS_SVE2) - set(KLEIDICV_BUILD_SVE2 ON) - else() - message(STATUS "[KleidiCV] Compiler does not support SVE2") - endif() -endif() - -# Compiler feature check for SME2. -set(KLEIDICV_BUILD_SME2 OFF) -if (KLEIDICV_ENABLE_SME2) - # Assuming only clang >= 19 supports SME2 - if ((NOT CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19) - set(KLEIDICV_COMPILER_SUPPORTS_SME2 OFF) - else() - check_cxx_compiler_flag("-march=armv9-a+sme2" KLEIDICV_COMPILER_SUPPORTS_SME2) - endif() - - if (KLEIDICV_COMPILER_SUPPORTS_SME2) - set(KLEIDICV_BUILD_SME2 ON) - else() - message(STATUS "[KleidiCV] Compiler does not support SME2") - endif() -endif() - -if(KLEIDICV_BUILD_SME2 AND NOT KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS) +if(KLEIDICV_ENABLE_SME2 AND NOT KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS) set(KLEIDICV_ALWAYS_ENABLE_SME2 ON) endif() -if(KLEIDICV_BUILD_SVE2 AND NOT KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS) +if(KLEIDICV_ENABLE_SVE2 AND NOT KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS) set(KLEIDICV_ALWAYS_ENABLE_SVE2 ON) endif() @@ -158,7 +138,7 @@ target_compile_options(kleidicv_neon PRIVATE "-DKLEIDICV_TARGET_NEON=1" ) -if(KLEIDICV_BUILD_SVE2) +if(KLEIDICV_ENABLE_SVE2) add_library(kleidicv_sve2 OBJECT ${KLEIDICV_SVE2_SOURCES}) target_include_directories(kleidicv_sve2 PRIVATE ${KLEIDICV_INCLUDE_DIRS}) set_target_properties(kleidicv_sve2 PROPERTIES CXX_STANDARD 17) @@ -169,7 +149,7 @@ if(KLEIDICV_BUILD_SVE2) ) endif() -if(KLEIDICV_BUILD_SME2) +if(KLEIDICV_ENABLE_SME2) add_library(kleidicv_sme2 OBJECT ${KLEIDICV_SME2_SOURCES}) target_include_directories(kleidicv_sme2 PRIVATE ${KLEIDICV_INCLUDE_DIRS}) set_target_properties(kleidicv_sme2 PROPERTIES CXX_STANDARD 17) @@ -186,12 +166,12 @@ set_target_properties(kleidicv PROPERTIES CXX_STANDARD 17) target_compile_options(kleidicv PRIVATE ${KLEIDICV_CXX_FLAGS}) target_link_libraries(kleidicv PRIVATE kleidicv_neon) -if(KLEIDICV_BUILD_SVE2) +if(KLEIDICV_ENABLE_SVE2) target_compile_definitions(kleidicv PRIVATE KLEIDICV_HAVE_SVE2) target_link_libraries(kleidicv PRIVATE kleidicv_sve2) endif() -if(KLEIDICV_BUILD_SME2) +if(KLEIDICV_ENABLE_SME2) target_compile_definitions(kleidicv PRIVATE KLEIDICV_HAVE_SME2) target_link_libraries(kleidicv PRIVATE kleidicv_sme2) endif() diff --git a/kleidicv/include/kleidicv/config.h.in b/kleidicv/include/kleidicv/config.h.in index 65196e761ee7b324581b2050837a948b94511b1e..18a0970f34befde939d304092ab5f2cbc425344f 100644 --- a/kleidicv/include/kleidicv/config.h.in +++ b/kleidicv/include/kleidicv/config.h.in @@ -21,8 +21,6 @@ #cmakedefine01 KLEIDICV_CANNY_ALGORITHM_CONFORM_OPENCV -#cmakedefine01 KLEIDICV_COMPILER_SUPPORTS_SME2 - // Set to '1' if compiling NEON code paths, otherwise it is set to '0'. #ifndef KLEIDICV_TARGET_NEON #define KLEIDICV_TARGET_NEON 0 diff --git a/scripts/benchmark/build.sh b/scripts/benchmark/build.sh index 2515e274b07e7b0be58ac3e77bfab21e7fc0e227..49e291a9ff3440ba6e886c296a2ec4d3e110d786 100755 --- a/scripts/benchmark/build.sh +++ b/scripts/benchmark/build.sh @@ -62,6 +62,7 @@ export EXTRA_CMAKE_ARGS="\ ${COMMON_EXTRA_CMAKE_ARGS} \ -DWITH_KLEIDICV=ON \ -DKLEIDICV_SOURCE_PATH=${KLEIDICV_SOURCE_PATH} \ + -DKLEIDICV_ENABLE_SME2=ON \ ${KLEIDICV_EXTRA_CMAKE_OPTIONS:-} \ " @@ -78,6 +79,7 @@ export EXTRA_CMAKE_ARGS="\ ${COMMON_EXTRA_CMAKE_ARGS} \ -DWITH_KLEIDICV=ON \ -DKLEIDICV_SOURCE_PATH=${KLEIDICV_SOURCE_PATH} \ + -DKLEIDICV_ENABLE_SME2=ON \ ${CUSTOM_CMAKE_OPTIONS} \ " diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index f46892aedc132ec3d6561493d2e14d5aa4e9c871..22a17dec6f60681ec581f8bfed47185fa1f842dd 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -19,7 +19,7 @@ tar -xzf /opt/opencv-${OPENCV_VERSION}.tar.gz -C build/unpatched-opencv-src BUILD_ID=unpatched-opencv \ OPENCV_PATH="$(pwd)/build/unpatched-opencv-src/opencv-${OPENCV_VERSION}" \ CMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -fuse-ld=lld" \ -EXTRA_CMAKE_ARGS="-DBUILD_SHARED_LIBS=OFF -DWITH_KLEIDICV=ON -DKLEIDICV_SOURCE_PATH=$(pwd)" \ +EXTRA_CMAKE_ARGS="-DBUILD_SHARED_LIBS=OFF -DWITH_KLEIDICV=ON -DKLEIDICV_SOURCE_PATH=$(pwd) -DKLEIDICV_ENABLE_SME2=ON" \ ./scripts/build-opencv.sh # ------------------------------------------------------------------------------ diff --git a/scripts/ci.sh b/scripts/ci.sh index 044464ad38029aaf56cd471fd8dda537bdeb66e4..43a09ce95742d5d27b1c481c1dbd95e471197366 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -42,6 +42,7 @@ cmake -S . -B build -G Ninja \ -DCMAKE_CXX_CLANG_TIDY=clang-tidy-19 \ -DCMAKE_CXX_FLAGS="--target=aarch64-linux-gnu --coverage" \ -DCMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -static -fuse-ld=lld" \ + -DKLEIDICV_ENABLE_SME2=ON \ -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_CHECK_BANNED_FUNCTIONS=ON @@ -101,6 +102,7 @@ cmake -S . -B build/build-benchmark -G Ninja \ -DCMAKE_SYSTEM_NAME=Linux \ -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ -DKLEIDICV_BENCHMARK=ON \ + -DKLEIDICV_ENABLE_SME2=ON \ -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF ninja -C build/build-benchmark kleidicv-benchmark diff --git a/scripts/run_opencv_conformity_checks.sh b/scripts/run_opencv_conformity_checks.sh index a4d9625ae889f6997a431ba838111cba7f2cbdd6..87248cf31c00f5983318aec73f022b232d4e1c96 100755 --- a/scripts/run_opencv_conformity_checks.sh +++ b/scripts/run_opencv_conformity_checks.sh @@ -81,7 +81,8 @@ ninja -C "${OPENCV_DEFAULT_PATH}" subordinate cmake "${common_cmake_args[@]}" \ -B "${OPENCV_KLEIDICV_PATH}" \ -DWITH_KLEIDICV=ON \ - -DKLEIDICV_SOURCE_PATH="${KLEIDICV_SOURCE_PATH}" + -DKLEIDICV_SOURCE_PATH="${KLEIDICV_SOURCE_PATH}" \ + -DKLEIDICV_ENABLE_SME2=ON ninja -C "${OPENCV_KLEIDICV_PATH}" manager TESTRESULT=0