diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 5a662e2137dcfefa26cdda78ff21952df5134fcb..1751b73c04109a8dc69e39edd2d11694f8a48b6b 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -109,6 +109,13 @@ int exp32f(const float *src, float *dst, int len); // Other HAL implementations might require the cv namespace namespace cv { +// If the KleidiCV function has a signature matching the OpenCV HAL interface +// AND it never returns KLEIDICV_NOT_IMPLEMENTED then we can call it directly +// and convert the return code. +#define KLEIDICV_HAL_FORWARD(kleidicv_impl, ...) \ + (kleidicv_impl(__VA_ARGS__) == KLEIDICV_OK ? CV_HAL_ERROR_OK \ + : CV_HAL_ERROR_UNKNOWN) + #define KLEIDICV_HAL_FALLBACK_FORWARD(kleidicv_impl, fallback_hal_impl, ...) \ (KLEIDICV_HAL_API(kleidicv_impl)(__VA_ARGS__) == CV_HAL_ERROR_OK \ ? CV_HAL_ERROR_OK \ @@ -339,6 +346,82 @@ static inline int kleidicv_compare_u8_with_fallback( #undef cv_hal_cmp8u #define cv_hal_cmp8u kleidicv_compare_u8_with_fallback +#if KLEIDICV_ENABLE_ALL_OPENCV_HAL + +// clang-format off +#undef cv_hal_add8s +#define cv_hal_add8s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_s8, __VA_ARGS__) +#undef cv_hal_add8u +#define cv_hal_add8u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_u8, __VA_ARGS__) +#undef cv_hal_add16s +#define cv_hal_add16s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_s16, __VA_ARGS__) +#undef cv_hal_add16u +#define cv_hal_add16u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_u16, __VA_ARGS__) +#undef cv_hal_add32s +#define cv_hal_add32s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_s32, __VA_ARGS__) + +#undef cv_hal_sub8s +#define cv_hal_sub8s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_s8, __VA_ARGS__) +#undef cv_hal_sub8u +#define cv_hal_sub8u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_u8, __VA_ARGS__) +#undef cv_hal_sub16s +#define cv_hal_sub16s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_s16, __VA_ARGS__) +#undef cv_hal_sub16u +#define cv_hal_sub16u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_u16, __VA_ARGS__) +#undef cv_hal_sub32s +#define cv_hal_sub32s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_s32, __VA_ARGS__) + +#undef cv_hal_absdiff8s +#define cv_hal_absdiff8s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_s8, __VA_ARGS__) +#undef cv_hal_absdiff8u +#define cv_hal_absdiff8u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_u8, __VA_ARGS__) +#undef cv_hal_absdiff16s +#define cv_hal_absdiff16s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_s16, __VA_ARGS__) +#undef cv_hal_absdiff16u +#define cv_hal_absdiff16u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_u16, __VA_ARGS__) +#undef cv_hal_absdiff32s +#define cv_hal_absdiff32s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_s32, __VA_ARGS__) + +#undef cv_hal_and8u +#define cv_hal_and8u(...) KLEIDICV_HAL_FORWARD(kleidicv_bitwise_and, __VA_ARGS__) +// clang-format on + +#define KLEIDICV_HAL_MUL(suffix, kleidicv_impl, T) \ + static inline int kleidicv_##suffix##_with_fallback( \ + const T *src_a, size_t src_a_stride, const T *src_b, \ + size_t src_b_stride, T *dst, size_t dst_stride, size_t width, \ + size_t height, double scale) { \ + if (scale != 1.0) { \ + return cv_hal_##suffix(src_a, src_a_stride, src_b, src_b_stride, dst, \ + dst_stride, width, height, scale); \ + } \ + return KLEIDICV_HAL_FORWARD(kleidicv_impl, src_a, src_a_stride, src_b, \ + src_b_stride, dst, dst_stride, width, height, \ + scale); \ + } + +KLEIDICV_HAL_MUL(mul8u, kleidicv_saturating_multiply_u8, uint8_t); +#undef cv_hal_mul8u +#define cv_hal_mul8u kleidicv_mul8u_with_fallback + +KLEIDICV_HAL_MUL(mul8s, kleidicv_saturating_multiply_s8, int8_t); +#undef cv_hal_mul8s +#define cv_hal_mul8s kleidicv_mul8s_with_fallback + +KLEIDICV_HAL_MUL(mul16u, kleidicv_saturating_multiply_u16, uint16_t); +#undef cv_hal_mul16u +#define cv_hal_mul16u kleidicv_mul16u_with_fallback + +KLEIDICV_HAL_MUL(mul16s, kleidicv_saturating_multiply_s16, int16_t); +#undef cv_hal_mul16s +#define cv_hal_mul16s kleidicv_mul16s_with_fallback + +KLEIDICV_HAL_MUL(mul32s, kleidicv_saturating_multiply_s32, int32_t); +#undef cv_hal_mul32s +#define cv_hal_mul32s kleidicv_mul32s_with_fallback + +#endif // KLEIDICV_ENABLE_ALL_OPENCV_HAL + #endif // OPENCV_CORE_HAL_REPLACEMENT_HPP // Remove no longer needed macro definitions. diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 88a80ff36d78da66b541b9f684638db21d96f7e1..4fc695794d84c71fd7dc685035d19aecfc01be2c 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -30,6 +30,7 @@ add_executable( manager manager.cpp tests.cpp + test_binary_op.cpp test_gaussian_blur.cpp test_min_max.cpp test_rgb2yuv.cpp @@ -69,6 +70,7 @@ add_executable( subordinate subordinate.cpp tests.cpp + test_binary_op.cpp test_gaussian_blur.cpp test_min_max.cpp test_rgb2yuv.cpp diff --git a/conformity/opencv/test_binary_op.cpp b/conformity/opencv/test_binary_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f6544b42c4fb8cc1f35ede6230c43d750cc516d --- /dev/null +++ b/conformity/opencv/test_binary_op.cpp @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "test_binary_op.h" + +#include +#include +#include + +#include "opencv2/core/hal/interface.h" + +static cv::Mat add(cv::Mat& a, cv::Mat& b) { return a + b; } + +static cv::Mat sub(cv::Mat& a, cv::Mat& b) { return a - b; } + +static cv::Mat mul(cv::Mat& a, cv::Mat& b) { return a.mul(b); } + +static cv::Mat absdiff(cv::Mat& a, cv::Mat& b) { + cv::Mat dst; + cv::absdiff(a, b, dst); + return dst; +} + +static cv::Mat bitwise_and(cv::Mat& a, cv::Mat& b) { + cv::Mat dst; + cv::bitwise_and(a, b, dst); + return dst; +} + +template +static cv::Mat exec_binary_op(cv::Mat& input_mat) { + int mid = input_mat.rows / 2; + cv::Mat a = input_mat.rowRange(0, mid); + cv::Mat b = input_mat.rowRange(mid, input_mat.rows); + return F(a, b); +} + +#if MANAGER +template +static bool test_binary_op(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t height = 5; height <= 16; ++height) { + for (size_t width = 5; width <= 16; ++width) { + cv::Mat input(height * 2, width, Format); + rng.fill(input, cv::RNG::UNIFORM, std::numeric_limits::lowest(), + std::numeric_limits::max()); + + cv::Mat actual = exec_binary_op(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(height, width, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +#define BINARY_OP_TEST(op, format, t) \ + TEST(#op " " #format, (test_binary_op), exec_binary_op) + +std::vector& binary_op_tests_get() { + static std::vector tests = { + BINARY_OP_TEST(add, CV_8SC1, int8_t), + BINARY_OP_TEST(sub, CV_8UC2, uint8_t), + BINARY_OP_TEST(mul, CV_16UC3, uint16_t), + BINARY_OP_TEST(absdiff, CV_16SC4, int16_t), + BINARY_OP_TEST(bitwise_and, CV_32SC2, int32_t), + }; + return tests; +} diff --git a/conformity/opencv/test_binary_op.h b/conformity/opencv/test_binary_op.h new file mode 100644 index 0000000000000000000000000000000000000000..169e79ad081eaa8ddc838fdcd9e544e5a4078a99 --- /dev/null +++ b/conformity/opencv/test_binary_op.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ + +#include + +#include "tests.h" + +std::vector& binary_op_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 657d5551e77f5e3da6e021a7c3ed6b5616ce6353..cb5fe944c3f92105461ce6548b90a4db8c0171f7 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -10,6 +10,7 @@ #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" +#include "test_binary_op.h" #include "test_exp.h" #include "test_gaussian_blur.h" #include "test_min_max.h" @@ -27,6 +28,7 @@ static std::vector merge_tests( } std::vector all_tests = merge_tests({ + binary_op_tests_get, gaussian_blur_tests_get, min_max_tests_get, rgb2yuv_tests_get, diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt index f4446a635fba7b5273bd031e34a788899d9d57de..b06555250b62d3c7c1e6544e283606f9e02092dc 100644 --- a/kleidicv/CMakeLists.txt +++ b/kleidicv/CMakeLists.txt @@ -20,6 +20,12 @@ option( "Limits SVE2 code paths to selected algorithms. Has no effect if KLEIDICV_ENABLE_SVE2 is false." ON ) +option( + KLEIDICV_ENABLE_ALL_OPENCV_HAL + "Internal - Enable all KleidiCV operations in the OpenCV HAL. + By default operations are only enabled in the HAL if benchmarks show a measurable performance uplift." + OFF +) option(KLEIDICV_CHECK_BANNED_FUNCTIONS "Internal - Check source for deprecated or obsolescent functions" OFF) option(KLEIDICV_ASSUME_128BIT_SVE2 "Internal - If turned ON 128-bit SVE2 vector length is assumed" OFF) option(KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE "Internal - If turned ON interleaving loads and stores are preferred instead of continuous loads and stores" OFF) diff --git a/kleidicv/include/kleidicv/config.h.in b/kleidicv/include/kleidicv/config.h.in index 4efcf8e310bdb23551726ae5fd9884d1932b5a5e..1985155dc8c3f21b30393d6615d42662a44c8557 100644 --- a/kleidicv/include/kleidicv/config.h.in +++ b/kleidicv/include/kleidicv/config.h.in @@ -11,6 +11,8 @@ #cmakedefine01 KLEIDICV_ASSUME_128BIT_SVE2 +#cmakedefine01 KLEIDICV_ENABLE_ALL_OPENCV_HAL + #cmakedefine01 KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE #cmakedefine01 KLEIDICV_EXPERIMENTAL_FEATURE_CANNY diff --git a/scripts/run_opencv_conformity_checks.sh b/scripts/run_opencv_conformity_checks.sh index aed20998916b8816e26195147db7569fad37711b..30c209734febd841f0044a94125f86c9573e6478 100755 --- a/scripts/run_opencv_conformity_checks.sh +++ b/scripts/run_opencv_conformity_checks.sh @@ -50,6 +50,7 @@ cmake "${common_cmake_args[@]}" \ -B "${OPENCV_KLEIDICV_PATH}" \ -DWITH_KLEIDICV=ON \ -DKLEIDICV_SOURCE_PATH="${KLEIDICV_SOURCE_PATH}" \ + -DKLEIDICV_ENABLE_ALL_OPENCV_HAL=ON \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF ninja -C "${OPENCV_KLEIDICV_PATH}" manager