From 51527ecceeb91998b0db272bc5bb5b0a1d7da6fb Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Fri, 21 Jun 2024 11:12:19 +0100 Subject: [PATCH] Add KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS --- .devcontainer/coverage.sh | 2 +- .vscode/tasks.json | 6 +++--- CHANGELOG.md | 1 + doc/build.md | 1 + kleidicv/CMakeLists.txt | 9 +++++++++ kleidicv/include/kleidicv/config.h.in | 2 ++ kleidicv/include/kleidicv/dispatch.h | 6 ++++++ kleidicv/src/arithmetics/absdiff_api.cpp | 2 +- kleidicv/src/arithmetics/add_api.cpp | 2 +- kleidicv/src/arithmetics/sub_api.cpp | 2 +- kleidicv/src/logical/bitwise_and_api.cpp | 2 +- scripts/ci.sh | 2 ++ scripts/run_opencv_conformity_checks.sh | 1 + 13 files changed, 30 insertions(+), 8 deletions(-) diff --git a/.devcontainer/coverage.sh b/.devcontainer/coverage.sh index 1abf33d6c..def7d4ab3 100755 --- a/.devcontainer/coverage.sh +++ b/.devcontainer/coverage.sh @@ -9,7 +9,7 @@ set -eu BUILD_ID="kleidicv-coverage" \ CMAKE_CXX_FLAGS="--target=aarch64-linux-gnu --coverage" \ CMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -static -fuse-ld=lld" \ -EXTRA_CMAKE_ARGS="-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" \ +EXTRA_CMAKE_ARGS="-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" \ ./scripts/build.sh kleidicv-test # Clean any coverage results from previous runs diff --git a/.vscode/tasks.json b/.vscode/tasks.json index f28727db8..7590c4c6d 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -13,7 +13,7 @@ "env": { "CMAKE_CXX_FLAGS": "--target=aarch64-linux-gnu", "CMAKE_EXE_LINKER_FLAGS": "--rtlib=compiler-rt -static -fuse-ld=lld", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=1" + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF -DCMAKE_EXPORT_COMPILE_COMMANDS=1" } }, "group": { @@ -35,7 +35,7 @@ "CMAKE_BUILD_TYPE": "Debug", "CMAKE_CXX_FLAGS": "--target=aarch64-linux-gnu", "CMAKE_EXE_LINKER_FLAGS": "--rtlib=compiler-rt -static -fuse-ld=lld", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF" } }, "group": { @@ -54,7 +54,7 @@ "env": { "BUILD_ID": "kleidicv-gcc", "CMAKE_EXE_LINKER_FLAGS": "-static", - "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF", + "EXTRA_CMAKE_ARGS": "-DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF", "CC": "aarch64-linux-gnu-gcc", "CXX": "aarch64-linux-gnu-g++" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 0465031b9..d8572f877 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ This changelog aims to follow the guiding principles of - Resize 4x4 for float. - Resize 0.5x0.5 for uint8_t. - Conversion from float to (u)int8_t and vice versa. +- KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS configuration option. ### Fixed diff --git a/doc/build.md b/doc/build.md index 727ef295f..5906cd34c 100644 --- a/doc/build.md +++ b/doc/build.md @@ -118,5 +118,6 @@ In addition to the standard CMake settings, KleidiCV behaviour can be modified at build time via the following CMake options: - `KLEIDICV_BENCHMARK` - Enable building KleidiCV benchmarks. The benchmarks use Google Benchmark which will be downloaded automatically. Off by default. - `KLEIDICV_ENABLE_SME2` - Enable Scalable Matrix Extension 2 and Streaming Scalable Vector Extension code paths if supported by the compiler. On by default. + - `KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS` - Limit Scalable Matrix Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SME2` is false. - `KLEIDICV_ENABLE_SVE2` - Enable Scalable Vector Extension 2 code paths if supported by the compiler. On by default. - `KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS` - Limit Scalable Vector Extension 2 code paths to cases where it is expected to provide a benefit over other code paths. On by default. Has no effect if `KLEIDICV_ENABLE_SVE2` is false. diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt index 8ece23cd4..4c17e4e6f 100644 --- a/kleidicv/CMakeLists.txt +++ b/kleidicv/CMakeLists.txt @@ -15,6 +15,11 @@ else() set(KLEIDICV_ENABLE_SVE2 OFF) set(KLEIDICV_ENABLE_SME2 OFF) endif() +option( + KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS + "Limits SME2 code paths to selected algorithms. Has no effect if KLEIDICV_ENABLE_SME2 is false." + ON +) option( KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS "Limits SVE2 code paths to selected algorithms. Has no effect if KLEIDICV_ENABLE_SVE2 is false." @@ -66,6 +71,10 @@ if (KLEIDICV_ENABLE_SME2) endif() endif() +if(KLEIDICV_BUILD_SME2 AND NOT KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS) + set(KLEIDICV_ALWAYS_ENABLE_SME2 ON) +endif() + if(KLEIDICV_BUILD_SVE2 AND NOT KLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS) set(KLEIDICV_ALWAYS_ENABLE_SVE2 ON) endif() diff --git a/kleidicv/include/kleidicv/config.h.in b/kleidicv/include/kleidicv/config.h.in index 1985155dc..65196e761 100644 --- a/kleidicv/include/kleidicv/config.h.in +++ b/kleidicv/include/kleidicv/config.h.in @@ -7,6 +7,8 @@ // Main configuration switches. +#cmakedefine01 KLEIDICV_ALWAYS_ENABLE_SME2 + #cmakedefine01 KLEIDICV_ALWAYS_ENABLE_SVE2 #cmakedefine01 KLEIDICV_ASSUME_128BIT_SVE2 diff --git a/kleidicv/include/kleidicv/dispatch.h b/kleidicv/include/kleidicv/dispatch.h index 7ff35c8ff..a7400e66d 100644 --- a/kleidicv/include/kleidicv/dispatch.h +++ b/kleidicv/include/kleidicv/dispatch.h @@ -81,6 +81,12 @@ static inline bool hwcaps_has_sme2(HwCaps hwcaps) { #endif // KLEIDICV_HAVE_SVE2 || KLEIDICV_HAVE_SME2 +#if KLEIDICV_ALWAYS_ENABLE_SME2 +#define KLEIDICV_SME2_IMPL_IF(func) func +#else +#define KLEIDICV_SME2_IMPL_IF(func) nullptr +#endif // KLEIDICV_ALWAYS_ENABLE_SME2 + #if KLEIDICV_ALWAYS_ENABLE_SVE2 #define KLEIDICV_SVE2_IMPL_IF(func) func #else diff --git a/kleidicv/src/arithmetics/absdiff_api.cpp b/kleidicv/src/arithmetics/absdiff_api.cpp index 2a010ab1b..28da57a1e 100644 --- a/kleidicv/src/arithmetics/absdiff_api.cpp +++ b/kleidicv/src/arithmetics/absdiff_api.cpp @@ -43,7 +43,7 @@ kleidicv_error_t saturating_absdiff(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::saturating_absdiff, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::saturating_absdiff), \ - &kleidicv::sme2::saturating_absdiff) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::saturating_absdiff)) KLEIDICV_DEFINE_C_API(kleidicv_saturating_absdiff_u8, uint8_t); KLEIDICV_DEFINE_C_API(kleidicv_saturating_absdiff_s8, int8_t); diff --git a/kleidicv/src/arithmetics/add_api.cpp b/kleidicv/src/arithmetics/add_api.cpp index a88b7b54b..b55b0fe92 100644 --- a/kleidicv/src/arithmetics/add_api.cpp +++ b/kleidicv/src/arithmetics/add_api.cpp @@ -40,7 +40,7 @@ kleidicv_error_t saturating_add(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::saturating_add, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::saturating_add), \ - &kleidicv::sme2::saturating_add) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::saturating_add)) KLEIDICV_DEFINE_C_API(kleidicv_saturating_add_s8, int8_t); KLEIDICV_DEFINE_C_API(kleidicv_saturating_add_u8, uint8_t); diff --git a/kleidicv/src/arithmetics/sub_api.cpp b/kleidicv/src/arithmetics/sub_api.cpp index edabdc034..2d302b10c 100644 --- a/kleidicv/src/arithmetics/sub_api.cpp +++ b/kleidicv/src/arithmetics/sub_api.cpp @@ -39,7 +39,7 @@ kleidicv_error_t saturating_sub(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::saturating_sub, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::saturating_sub), \ - &kleidicv::sme2::saturating_sub) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::saturating_sub)) KLEIDICV_DEFINE_C_API(kleidicv_saturating_sub_s8, int8_t); KLEIDICV_DEFINE_C_API(kleidicv_saturating_sub_u8, uint8_t); diff --git a/kleidicv/src/logical/bitwise_and_api.cpp b/kleidicv/src/logical/bitwise_and_api.cpp index 6bf0cd89f..c4f744f88 100644 --- a/kleidicv/src/logical/bitwise_and_api.cpp +++ b/kleidicv/src/logical/bitwise_and_api.cpp @@ -40,6 +40,6 @@ kleidicv_error_t bitwise_and(const T *src_a, size_t src_a_stride, KLEIDICV_MULTIVERSION_C_API( \ name, &kleidicv::neon::bitwise_and, \ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::bitwise_and), \ - &kleidicv::sme2::bitwise_and) + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::bitwise_and)) KLEIDICV_DEFINE_C_API(kleidicv_bitwise_and, uint8_t); diff --git a/scripts/ci.sh b/scripts/ci.sh index c097b4498..b80bc5f6e 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -37,6 +37,7 @@ cmake -S . -B build -G Ninja \ -DCMAKE_CXX_CLANG_TIDY=clang-tidy \ -DCMAKE_CXX_FLAGS="--target=aarch64-linux-gnu --coverage" \ -DCMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -static -fuse-ld=lld" \ + -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_CHECK_BANNED_FUNCTIONS=ON @@ -95,6 +96,7 @@ cmake -S . -B build/build-benchmark -G Ninja \ -DCMAKE_SYSTEM_NAME=Linux \ -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ -DKLEIDICV_BENCHMARK=ON \ + -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF ninja -C build/build-benchmark kleidicv-benchmark diff --git a/scripts/run_opencv_conformity_checks.sh b/scripts/run_opencv_conformity_checks.sh index 30c209734..fd8f9e4cb 100755 --- a/scripts/run_opencv_conformity_checks.sh +++ b/scripts/run_opencv_conformity_checks.sh @@ -51,6 +51,7 @@ cmake "${common_cmake_args[@]}" \ -DWITH_KLEIDICV=ON \ -DKLEIDICV_SOURCE_PATH="${KLEIDICV_SOURCE_PATH}" \ -DKLEIDICV_ENABLE_ALL_OPENCV_HAL=ON \ + -DKLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS=OFF \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF ninja -C "${OPENCV_KLEIDICV_PATH}" manager -- GitLab