diff --git a/.devcontainer/coverage.sh b/.devcontainer/coverage.sh index 1abf33d6c2877d63b06da6c72f5029927e9cb933..def7d4ab3d69f461917ef94a631947a92ce3ee17 100755 --- a/.devcontainer/coverage.sh +++ b/.devcontainer/coverage.sh @@ -9,7 +9,7 @@ set -eu BUILD_ID="kleidicv-coverage" \ CMAKE_CXX_FLAGS="--target=aarch64-linux-gnu --coverage" \ CMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -static -fuse-ld=lld" \ -EXTRA_CMAKE_ARGS="-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" \ +EXTRA_CMAKE_ARGS="-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" \ ./scripts/build.sh kleidicv-test # Clean any coverage results from previous runs diff --git a/.vscode/tasks.json b/.vscode/tasks.json index f28727db8a9f43cef5551935055bbf0fa9a3d12e..7590c4c6d38abcc0813b3ae2d598f5290342930b 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -13,7 +13,7 @@ "env": { "CMAKE_CXX_FLAGS": "--target=aarch64-linux-gnu", "CMAKE_EXE_LINKER_FLAGS": "--rtlib=compiler-rt -static -fuse-ld=lld", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=1" + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=1" } }, "group": { @@ -35,7 +35,7 @@ "CMAKE_BUILD_TYPE": "Debug", "CMAKE_CXX_FLAGS": "--target=aarch64-linux-gnu", "CMAKE_EXE_LINKER_FLAGS": "--rtlib=compiler-rt -static -fuse-ld=lld", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" } }, "group": { @@ -54,7 +54,7 @@ "env": { "BUILD_ID": "kleidicv-gcc", "CMAKE_EXE_LINKER_FLAGS": "-static", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF", + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF", "CC": "aarch64-linux-gnu-gcc", "CXX": "aarch64-linux-gnu-g++" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 0465031b94a154bd5ff823d4792f53ea838555b9..d8572f87726747f52ed42b53be2d317a1fedb563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ This changelog aims to follow the guiding principles of - Resize 4x4 for float. - Resize 0.5x0.5 for uint8_t. - Conversion from float to (u)int8_t and vice versa. +- KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS configuration option. ### Fixed diff --git a/doc/build.md b/doc/build.md index 727ef295f8a8bb5d23ee491af4a30eeaa7f82298..5906cd34c62660fd73009eaf86dc9ddfc1b77bc5 100644 --- a/doc/build.md +++ b/doc/build.md @@ -118,5 +118,6 @@ In addition to the standard CMake settings, KleidiCV behaviour can be modified at build time via the following CMake options: - `KLEIDICV_BENCHMARK` - Enable building KleidiCV benchmarks. The benchmarks use Google Benchmark which will be downloaded automatically. Off by default. - `KLEIDICV_ENABLE_SME2` - Enable Scalable Matrix Extension 2 and Streaming Scalable Vector Extension code paths if supported by the compiler. On by default. + - `KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS` - Limit Scalable Matrix Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SME2` is false. - `KLEIDICV_ENABLE_SVE2` - Enable Scalable Vector Extension 2 code paths if supported by the compiler. On by default. - `KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS` - Limit Scalable Vector Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SVE2` is false. diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt index 8ece23cd4956acfacc9a3a0a5130c9645e2705a3..4c17e4e6f70ae9e86871cef7fcef20037c2b7c78 100644 --- a/kleidicv/CMakeLists.txt +++ b/kleidicv/CMakeLists.txt @@ -15,6 +15,11 @@ else() set(KLEIDICV_ENABLE_SVE2 OFF) set(KLEIDICV_ENABLE_SME2 OFF) endif() +option( + KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS + "Limits SME2 code paths to selected algorithms. Has no effect if KLEIDICV_ENABLE_SME2 is false." + ON +) option( KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS "Limits SVE2 code paths to selected algorithms. Has no effect if KLEIDICV_ENABLE_SVE2 is false." @@ -66,6 +71,10 @@ if (KLEIDICV_ENABLE_SME2) endif() endif() +if(KLEIDICV_BUILD_SME2 AND NOT KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS) + set(KLEIDICV_ALWAYS_ENABLE_SME2 ON) +endif() + if(KLEIDICV_BUILD_SVE2 AND NOT KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS) set(KLEIDICV_ALWAYS_ENABLE_SVE2 ON) endif() diff --git a/kleidicv/include/kleidicv/config.h.in b/kleidicv/include/kleidicv/config.h.in index 1985155dc8c3f21b30393d6615d42662a44c8557..65196e761ee7b324581b2050837a948b94511b1e 100644 --- a/kleidicv/include/kleidicv/config.h.in +++ b/kleidicv/include/kleidicv/config.h.in @@ -7,6 +7,8 @@ // Main configuration switches. +#cmakedefine01 KLEIDICV_ALWAYS_ENABLE_SME2 + #cmakedefine01 KLEIDICV_ALWAYS_ENABLE_SVE2 #cmakedefine01 KLEIDICV_ASSUME_128BIT_SVE2 diff --git a/kleidicv/include/kleidicv/dispatch.h b/kleidicv/include/kleidicv/dispatch.h index 7ff35c8ffef32e2628b7001b45e8401b4bbbd8be..a7400e66d84874421a24c1342fb9708b1cde9380 100644 --- a/kleidicv/include/kleidicv/dispatch.h +++ b/kleidicv/include/kleidicv/dispatch.h @@ -81,6 +81,12 @@ static inline bool hwcaps_has_sme2(HwCaps hwcaps) { #endif // KLEIDICV_HAVE_SVE2 || KLEIDICV_HAVE_SME2 +#if KLEIDICV_ALWAYS_ENABLE_SME2 +#define KLEIDICV_SME2_IMPL_IF(func) func +#else +#define KLEIDICV_SME2_IMPL_IF(func) nullptr +#endif // KLEIDICV_ALWAYS_ENABLE_SME2 + #if KLEIDICV_ALWAYS_ENABLE_SVE2 #define KLEIDICV_SVE2_IMPL_IF(func) func #else diff --git a/kleidicv/src/arithmetics/absdiff_api.cpp b/kleidicv/src/arithmetics/absdiff_api.cpp index 2a010ab1bf7fc1faedc1dc3516b461e2a190ab12..28da57a1e3d5144e452b9b91501a0222e1542eaa 100644 --- a/kleidicv/src/arithmetics/absdiff_api.cpp +++ b/kleidicv/src/arithmetics/absdiff_api.cpp @@ -43,7 +43,7 @@ kleidicv_error_t saturating_absdiff(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::saturating_absdiff, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::saturating_absdiff), \ - &kleidicv::sme2::saturating_absdiff) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::saturating_absdiff)) KLEIDICV_DEFINE_C_API(kleidicv_saturating_absdiff_u8, uint8_t); KLEIDICV_DEFINE_C_API(kleidicv_saturating_absdiff_s8, int8_t); diff --git a/kleidicv/src/arithmetics/add_api.cpp b/kleidicv/src/arithmetics/add_api.cpp index a88b7b54bfafd0dc6c63bb7015b995083166b0b3..b55b0fe9280e357e696160e4ad7cc80ef6b8e6b6 100644 --- a/kleidicv/src/arithmetics/add_api.cpp +++ b/kleidicv/src/arithmetics/add_api.cpp @@ -40,7 +40,7 @@ kleidicv_error_t saturating_add(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::saturating_add, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::saturating_add), \ - &kleidicv::sme2::saturating_add) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::saturating_add)) KLEIDICV_DEFINE_C_API(kleidicv_saturating_add_s8, int8_t); KLEIDICV_DEFINE_C_API(kleidicv_saturating_add_u8, uint8_t); diff --git a/kleidicv/src/arithmetics/sub_api.cpp b/kleidicv/src/arithmetics/sub_api.cpp index edabdc034f7a54c5db253df18ab32157f7ef203b..2d302b10c6f0d396f5e9b233a97ae952e5eedcda 100644 --- a/kleidicv/src/arithmetics/sub_api.cpp +++ b/kleidicv/src/arithmetics/sub_api.cpp @@ -39,7 +39,7 @@ kleidicv_error_t saturating_sub(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::saturating_sub, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::saturating_sub), \ - &kleidicv::sme2::saturating_sub) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::saturating_sub)) KLEIDICV_DEFINE_C_API(kleidicv_saturating_sub_s8, int8_t); KLEIDICV_DEFINE_C_API(kleidicv_saturating_sub_u8, uint8_t); diff --git a/kleidicv/src/logical/bitwise_and_api.cpp b/kleidicv/src/logical/bitwise_and_api.cpp index 6bf0cd89f79f1be69aa5cf1c96d59a7e012bb4f8..c4f744f884b3709da7ea7dcf165c43427a199ce5 100644 --- a/kleidicv/src/logical/bitwise_and_api.cpp +++ b/kleidicv/src/logical/bitwise_and_api.cpp @@ -40,6 +40,6 @@ kleidicv_error_t bitwise_and(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::bitwise_and, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::bitwise_and), \ - &kleidicv::sme2::bitwise_and) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::bitwise_and)) KLEIDICV_DEFINE_C_API(kleidicv_bitwise_and, uint8_t); diff --git a/scripts/ci.sh b/scripts/ci.sh index c097b4498577ffc94f5f61ee767e45f072944598..b80bc5f6eb94c579bf76a7cc0e07b7c9a28823ac 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -37,6 +37,7 @@ cmake -S . -B build -G Ninja \ -DCMAKE_CXX_CLANG_TIDY=clang-tidy \ -DCMAKE_CXX_FLAGS="--target=aarch64-linux-gnu --coverage" \ -DCMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -static -fuse-ld=lld" \ + -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_CHECK_BANNED_FUNCTIONS=ON @@ -95,6 +96,7 @@ cmake -S . -B build/build-benchmark -G Ninja \ -DCMAKE_SYSTEM_NAME=Linux \ -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ -DKLEIDICV_BENCHMARK=ON \ + -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF ninja -C build/build-benchmark kleidicv-benchmark diff --git a/scripts/run_opencv_conformity_checks.sh b/scripts/run_opencv_conformity_checks.sh index 30c209734febd841f0044a94125f86c9573e6478..fd8f9e4cb78ff615f53d59acd8ea65daac954d50 100755 --- a/scripts/run_opencv_conformity_checks.sh +++ b/scripts/run_opencv_conformity_checks.sh @@ -51,6 +51,7 @@ cmake "${common_cmake_args[@]}" \ -DWITH_KLEIDICV=ON \ -DKLEIDICV_SOURCE_PATH="${KLEIDICV_SOURCE_PATH}" \ -DKLEIDICV_ENABLE_ALL_OPENCV_HAL=ON \ + -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF ninja -C "${OPENCV_KLEIDICV_PATH}" manager