From 46d26e3f7d4ea56cfa051df3dce4f9b880fbe2c4 Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Fri, 31 May 2024 07:49:08 +0000 Subject: [PATCH] Optionally enable simple operations in OpenCV HAL There's not much scope to optimise such trivial operations but nevertheless it can be interesting to use it from OpenCV. Therefore add a CMake option KLEIDICV_ENABLE_ALL_OPENCV_HAL to enable operations regardless of whether they will provide a performance uplift. --- adapters/opencv/kleidicv_hal.h | 83 +++++++++++++++++++++++++ conformity/opencv/CMakeLists.txt | 2 + conformity/opencv/test_binary_op.cpp | 78 +++++++++++++++++++++++ conformity/opencv/test_binary_op.h | 14 +++++ conformity/opencv/tests.cpp | 2 + kleidicv/CMakeLists.txt | 6 ++ kleidicv/include/kleidicv/config.h.in | 2 + scripts/run_opencv_conformity_checks.sh | 1 + 8 files changed, 188 insertions(+) create mode 100644 conformity/opencv/test_binary_op.cpp create mode 100644 conformity/opencv/test_binary_op.h diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 5a662e213..1751b73c0 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -109,6 +109,13 @@ int exp32f(const float *src, float *dst, int len); // Other HAL implementations might require the cv namespace namespace cv { +// If the KleidiCV function has a signature matching the OpenCV HAL interface +// AND it never returns KLEIDICV_NOT_IMPLEMENTED then we can call it directly +// and convert the return code. +#define KLEIDICV_HAL_FORWARD(kleidicv_impl, ...) \ + (kleidicv_impl(__VA_ARGS__) == KLEIDICV_OK ? CV_HAL_ERROR_OK \ + : CV_HAL_ERROR_UNKNOWN) + #define KLEIDICV_HAL_FALLBACK_FORWARD(kleidicv_impl, fallback_hal_impl, ...) \ (KLEIDICV_HAL_API(kleidicv_impl)(__VA_ARGS__) == CV_HAL_ERROR_OK \ ? CV_HAL_ERROR_OK \ @@ -339,6 +346,82 @@ static inline int kleidicv_compare_u8_with_fallback( #undef cv_hal_cmp8u #define cv_hal_cmp8u kleidicv_compare_u8_with_fallback +#if KLEIDICV_ENABLE_ALL_OPENCV_HAL + +// clang-format off +#undef cv_hal_add8s +#define cv_hal_add8s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_s8, __VA_ARGS__) +#undef cv_hal_add8u +#define cv_hal_add8u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_u8, __VA_ARGS__) +#undef cv_hal_add16s +#define cv_hal_add16s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_s16, __VA_ARGS__) +#undef cv_hal_add16u +#define cv_hal_add16u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_u16, __VA_ARGS__) +#undef cv_hal_add32s +#define cv_hal_add32s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_add_s32, __VA_ARGS__) + +#undef cv_hal_sub8s +#define cv_hal_sub8s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_s8, __VA_ARGS__) +#undef cv_hal_sub8u +#define cv_hal_sub8u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_u8, __VA_ARGS__) +#undef cv_hal_sub16s +#define cv_hal_sub16s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_s16, __VA_ARGS__) +#undef cv_hal_sub16u +#define cv_hal_sub16u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_u16, __VA_ARGS__) +#undef cv_hal_sub32s +#define cv_hal_sub32s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_sub_s32, __VA_ARGS__) + +#undef cv_hal_absdiff8s +#define cv_hal_absdiff8s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_s8, __VA_ARGS__) +#undef cv_hal_absdiff8u +#define cv_hal_absdiff8u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_u8, __VA_ARGS__) +#undef cv_hal_absdiff16s +#define cv_hal_absdiff16s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_s16, __VA_ARGS__) +#undef cv_hal_absdiff16u +#define cv_hal_absdiff16u(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_u16, __VA_ARGS__) +#undef cv_hal_absdiff32s +#define cv_hal_absdiff32s(...) KLEIDICV_HAL_FORWARD(kleidicv_saturating_absdiff_s32, __VA_ARGS__) + +#undef cv_hal_and8u +#define cv_hal_and8u(...) KLEIDICV_HAL_FORWARD(kleidicv_bitwise_and, __VA_ARGS__) +// clang-format on + +#define KLEIDICV_HAL_MUL(suffix, kleidicv_impl, T) \ + static inline int kleidicv_##suffix##_with_fallback( \ + const T *src_a, size_t src_a_stride, const T *src_b, \ + size_t src_b_stride, T *dst, size_t dst_stride, size_t width, \ + size_t height, double scale) { \ + if (scale != 1.0) { \ + return cv_hal_##suffix(src_a, src_a_stride, src_b, src_b_stride, dst, \ + dst_stride, width, height, scale); \ + } \ + return KLEIDICV_HAL_FORWARD(kleidicv_impl, src_a, src_a_stride, src_b, \ + src_b_stride, dst, dst_stride, width, height, \ + scale); \ + } + +KLEIDICV_HAL_MUL(mul8u, kleidicv_saturating_multiply_u8, uint8_t); +#undef cv_hal_mul8u +#define cv_hal_mul8u kleidicv_mul8u_with_fallback + +KLEIDICV_HAL_MUL(mul8s, kleidicv_saturating_multiply_s8, int8_t); +#undef cv_hal_mul8s +#define cv_hal_mul8s kleidicv_mul8s_with_fallback + +KLEIDICV_HAL_MUL(mul16u, kleidicv_saturating_multiply_u16, uint16_t); +#undef cv_hal_mul16u +#define cv_hal_mul16u kleidicv_mul16u_with_fallback + +KLEIDICV_HAL_MUL(mul16s, kleidicv_saturating_multiply_s16, int16_t); +#undef cv_hal_mul16s +#define cv_hal_mul16s kleidicv_mul16s_with_fallback + +KLEIDICV_HAL_MUL(mul32s, kleidicv_saturating_multiply_s32, int32_t); +#undef cv_hal_mul32s +#define cv_hal_mul32s kleidicv_mul32s_with_fallback + +#endif // KLEIDICV_ENABLE_ALL_OPENCV_HAL + #endif // OPENCV_CORE_HAL_REPLACEMENT_HPP // Remove no longer needed macro definitions. diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 88a80ff36..4fc695794 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -30,6 +30,7 @@ add_executable( manager manager.cpp tests.cpp + test_binary_op.cpp test_gaussian_blur.cpp test_min_max.cpp test_rgb2yuv.cpp @@ -69,6 +70,7 @@ add_executable( subordinate subordinate.cpp tests.cpp + test_binary_op.cpp test_gaussian_blur.cpp test_min_max.cpp test_rgb2yuv.cpp diff --git a/conformity/opencv/test_binary_op.cpp b/conformity/opencv/test_binary_op.cpp new file mode 100644 index 000000000..9f6544b42 --- /dev/null +++ b/conformity/opencv/test_binary_op.cpp @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "test_binary_op.h" + +#include +#include +#include + +#include "opencv2/core/hal/interface.h" + +static cv::Mat add(cv::Mat& a, cv::Mat& b) { return a + b; } + +static cv::Mat sub(cv::Mat& a, cv::Mat& b) { return a - b; } + +static cv::Mat mul(cv::Mat& a, cv::Mat& b) { return a.mul(b); } + +static cv::Mat absdiff(cv::Mat& a, cv::Mat& b) { + cv::Mat dst; + cv::absdiff(a, b, dst); + return dst; +} + +static cv::Mat bitwise_and(cv::Mat& a, cv::Mat& b) { + cv::Mat dst; + cv::bitwise_and(a, b, dst); + return dst; +} + +template +static cv::Mat exec_binary_op(cv::Mat& input_mat) { + int mid = input_mat.rows / 2; + cv::Mat a = input_mat.rowRange(0, mid); + cv::Mat b = input_mat.rowRange(mid, input_mat.rows); + return F(a, b); +} + +#if MANAGER +template +static bool test_binary_op(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t height = 5; height <= 16; ++height) { + for (size_t width = 5; width <= 16; ++width) { + cv::Mat input(height * 2, width, Format); + rng.fill(input, cv::RNG::UNIFORM, std::numeric_limits::lowest(), + std::numeric_limits::max()); + + cv::Mat actual = exec_binary_op(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(height, width, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +#define BINARY_OP_TEST(op, format, t) \ + TEST(#op " " #format, (test_binary_op), exec_binary_op) + +std::vector& binary_op_tests_get() { + static std::vector tests = { + BINARY_OP_TEST(add, CV_8SC1, int8_t), + BINARY_OP_TEST(sub, CV_8UC2, uint8_t), + BINARY_OP_TEST(mul, CV_16UC3, uint16_t), + BINARY_OP_TEST(absdiff, CV_16SC4, int16_t), + BINARY_OP_TEST(bitwise_and, CV_32SC2, int32_t), + }; + return tests; +} diff --git a/conformity/opencv/test_binary_op.h b/conformity/opencv/test_binary_op.h new file mode 100644 index 000000000..169e79ad0 --- /dev/null +++ b/conformity/opencv/test_binary_op.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ + +#include + +#include "tests.h" + +std::vector& binary_op_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 657d5551e..cb5fe944c 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -10,6 +10,7 @@ #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" +#include "test_binary_op.h" #include "test_exp.h" #include "test_gaussian_blur.h" #include "test_min_max.h" @@ -27,6 +28,7 @@ static std::vector merge_tests( } std::vector all_tests = merge_tests({ + binary_op_tests_get, gaussian_blur_tests_get, min_max_tests_get, rgb2yuv_tests_get, diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt index f4446a635..b06555250 100644 --- a/kleidicv/CMakeLists.txt +++ b/kleidicv/CMakeLists.txt @@ -20,6 +20,12 @@ option( "Limits SVE2 code paths to selected algorithms. Has no effect if KLEIDICV_ENABLE_SVE2 is false." ON ) +option( + KLEIDICV_ENABLE_ALL_OPENCV_HAL + "Internal - Enable all KleidiCV operations in the OpenCV HAL. + By default operations are only enabled in the HAL if benchmarks show a measurable performance uplift." + OFF +) option(KLEIDICV_CHECK_BANNED_FUNCTIONS "Internal - Check source for deprecated or obsolescent functions" OFF) option(KLEIDICV_ASSUME_128BIT_SVE2 "Internal - If turned ON 128-bit SVE2 vector length is assumed" OFF) option(KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE "Internal - If turned ON interleaving loads and stores are preferred instead of continuous loads and stores" OFF) diff --git a/kleidicv/include/kleidicv/config.h.in b/kleidicv/include/kleidicv/config.h.in index 4efcf8e31..1985155dc 100644 --- a/kleidicv/include/kleidicv/config.h.in +++ b/kleidicv/include/kleidicv/config.h.in @@ -11,6 +11,8 @@ #cmakedefine01 KLEIDICV_ASSUME_128BIT_SVE2 +#cmakedefine01 KLEIDICV_ENABLE_ALL_OPENCV_HAL + #cmakedefine01 KLEIDICV_PREFER_INTERLEAVING_LOAD_STORE #cmakedefine01 KLEIDICV_EXPERIMENTAL_FEATURE_CANNY diff --git a/scripts/run_opencv_conformity_checks.sh b/scripts/run_opencv_conformity_checks.sh index aed209989..30c209734 100755 --- a/scripts/run_opencv_conformity_checks.sh +++ b/scripts/run_opencv_conformity_checks.sh @@ -50,6 +50,7 @@ cmake "${common_cmake_args[@]}" \ -B "${OPENCV_KLEIDICV_PATH}" \ -DWITH_KLEIDICV=ON \ -DKLEIDICV_SOURCE_PATH="${KLEIDICV_SOURCE_PATH}" \ + -DKLEIDICV_ENABLE_ALL_OPENCV_HAL=ON \ -DKLEIDICV_LIMIT_SVE2_TO_SELECTED_ALGORITHMS=OFF ninja -C "${OPENCV_KLEIDICV_PATH}" manager -- GitLab