From dced970a01443c9fcfc0d03fe2546ec364256fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 15 Aug 2024 15:43:17 +0200 Subject: [PATCH 1/6] Minor refactor for Gaussian blur and SepFilter2D --- adapters/opencv/kleidicv_hal.cpp | 8 ++++---- kleidicv/src/filters/gaussian_blur_neon.cpp | 1 + kleidicv/src/filters/separable_filter_2d_neon.cpp | 4 ++-- test/api/test_separable_filter_2d.cpp | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 1fad7dc0c..92b8daf27 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -355,11 +355,11 @@ int separable_filter_2d_init(cvhalFilter2D **context, int src_type, return CV_HAL_ERROR_UNKNOWN; } - const uint8_t *kernel_x = new uint8_t[kernelx_length]; - const uint8_t *kernel_y = new uint8_t[kernely_length]; + uint8_t *kernel_x = new uint8_t[kernelx_length]; + uint8_t *kernel_y = new uint8_t[kernely_length]; - std::memcpy(const_cast(kernel_x), kernelx_data, kernelx_length); - std::memcpy(const_cast(kernel_y), kernely_data, kernely_length); + std::memcpy(kernel_x, kernelx_data, kernelx_length); + std::memcpy(kernel_y, kernely_data, kernely_length); params->channels = (src_type >> CV_CN_SHIFT) + 1; params->border_type = kleidicv_border_type; diff --git a/kleidicv/src/filters/gaussian_blur_neon.cpp b/kleidicv/src/filters/gaussian_blur_neon.cpp index f45a253ce..f8e7309dd 100644 --- a/kleidicv/src/filters/gaussian_blur_neon.cpp +++ b/kleidicv/src/filters/gaussian_blur_neon.cpp @@ -522,6 +522,7 @@ class GaussianBlur { static constexpr size_t kHalfKernelSize = get_half_kernel_size(KernelSize); + // Ignored because vectors are initialized in the constructor body. // NOLINTNEXTLINE - hicpp-member-init explicit GaussianBlur(float sigma) : half_kernel_(generate_gaussian_half_kernel(sigma)) { diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index 2a29a9d68..6efd14969 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -23,8 +23,8 @@ class SeparableFilter2D { using BufferType = uint16_t; using BufferVectorType = typename VecTraits::VectorType; using DestinationType = uint8_t; - using DestinationVectorType = typename VecTraits::VectorType; + // Ignored because vectors are initialized in the constructor body. // NOLINTNEXTLINE - hicpp-member-init SeparableFilter2D(const SourceType *kernel_x, const SourceType *kernel_y) : kernel_x_(kernel_x), kernel_y_(kernel_y) { @@ -107,7 +107,7 @@ class SeparableFilter2D { BufferVectorType kernel_x_u16_[5]; SourceVectorType kernel_y_u8_[5]; -}; +}; // end of class SeparableFilter2D template static kleidicv_error_t separable_filter_2d_checks( diff --git a/test/api/test_separable_filter_2d.cpp b/test/api/test_separable_filter_2d.cpp index 10a6ca47d..5a0407325 100644 --- a/test/api/test_separable_filter_2d.cpp +++ b/test/api/test_separable_filter_2d.cpp @@ -134,8 +134,8 @@ TYPED_TEST_SUITE(SeparableFilter2D, ElementTypes); TYPED_TEST(SeparableFilter2D, 5x5) { using KernelTestParams = SeparableFilter2DKernelTestParams; - const uint8_t kernel_x[5] = {5, 0, 1, 2, 2}; - const uint8_t kernel_y[5] = {1, 4, 3, 1, 0}; + const TypeParam kernel_x[5] = {5, 0, 1, 2, 2}; + const TypeParam kernel_y[5] = {1, 4, 3, 1, 0}; // Mask is created by 'kernel_y (outer product) kernel_x' test::Array2D mask{5, 5}; -- GitLab From 3202f30612404d03d5a0de61f8ebabb2bec9bb76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Tue, 13 Aug 2024 14:18:47 +0200 Subject: [PATCH 2/6] Add uint16 support to Separable Filter 2D --- adapters/opencv/kleidicv_hal.cpp | 52 ++++-- .../kleidicv/filters/separable_filter_2d.h | 32 ++++ kleidicv/include/kleidicv/kleidicv.h | 6 + .../src/filters/separable_filter_2d_api.cpp | 16 ++ .../src/filters/separable_filter_2d_neon.cpp | 132 +++++++++++++ kleidicv/src/filters/separable_filter_2d_sc.h | 174 ++++++++++++++++++ .../src/filters/separable_filter_2d_sme2.cpp | 14 ++ .../src/filters/separable_filter_2d_sve2.cpp | 12 ++ .../include/kleidicv_thread/kleidicv_thread.h | 7 + kleidicv_thread/src/kleidicv_thread.cpp | 17 ++ test/api/test_separable_filter_2d.cpp | 100 +++++++++- test/api/test_thread.cpp | 58 +++--- test/framework/border.cpp | 5 + 13 files changed, 588 insertions(+), 37 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 92b8daf27..53bc16751 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -299,6 +299,7 @@ static int from_opencv(int opencv_border_type, struct SeparableFilter2DParams { size_t channels; kleidicv_border_type_t border_type; + int operation_depth; const uint8_t *kernel_x; size_t kernel_width; const uint8_t *kernel_y; @@ -317,11 +318,16 @@ int separable_filter_2d_init(cvhalFilter2D **context, int src_type, return CV_HAL_ERROR_NOT_IMPLEMENTED; } - if (CV_MAT_DEPTH(src_type) != CV_8U) { + if (CV_MAT_DEPTH(src_type) != CV_MAT_DEPTH(kernel_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (CV_MAT_CN(kernel_type) != 1) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } - if (CV_MAT_DEPTH(kernel_type) != CV_8U) { + int operation_depth = CV_MAT_DEPTH(src_type); + if (operation_depth != CV_8U && operation_depth != CV_16U) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } @@ -355,14 +361,17 @@ int separable_filter_2d_init(cvhalFilter2D **context, int src_type, return CV_HAL_ERROR_UNKNOWN; } - uint8_t *kernel_x = new uint8_t[kernelx_length]; - uint8_t *kernel_y = new uint8_t[kernely_length]; + size_t type_size = get_type_size(operation_depth); - std::memcpy(kernel_x, kernelx_data, kernelx_length); - std::memcpy(kernel_y, kernely_data, kernely_length); + uint8_t *kernel_x = new uint8_t[kernelx_length * type_size]; + uint8_t *kernel_y = new uint8_t[kernely_length * type_size]; + + std::memcpy(kernel_x, kernelx_data, kernelx_length * type_size); + std::memcpy(kernel_y, kernely_data, kernely_length * type_size); params->channels = (src_type >> CV_CN_SHIFT) + 1; params->border_type = kleidicv_border_type; + params->operation_depth = operation_depth; params->kernel_x = kernel_x; params->kernel_width = static_cast(kernelx_length); @@ -424,12 +433,31 @@ int separable_filter_2d_operation(cvhalFilter2D *context, uchar *src_data, auto mt = get_multithreading(); - kleidicv_error_t filter_err = kleidicv_thread_separable_filter_2d_u8( - reinterpret_cast(src_data), src_step, - reinterpret_cast(dst_data), dst_step, - static_cast(width), static_cast(height), params->channels, - params->kernel_x, params->kernel_width, params->kernel_y, - params->kernel_height, params->border_type, filter_context, mt); + kleidicv_error_t filter_err; + switch (params->operation_depth) { + case CV_8U: + filter_err = kleidicv_thread_separable_filter_2d_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), + params->channels, params->kernel_x, params->kernel_width, + params->kernel_y, params->kernel_height, params->border_type, + filter_context, mt); + break; + case CV_16U: + filter_err = kleidicv_thread_separable_filter_2d_u16( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), + params->channels, + reinterpret_cast(params->kernel_x), + params->kernel_width, + reinterpret_cast(params->kernel_y), + params->kernel_height, params->border_type, filter_context, mt); + break; + default: + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } return convert_error(filter_err); } diff --git a/kleidicv/include/kleidicv/filters/separable_filter_2d.h b/kleidicv/include/kleidicv/filters/separable_filter_2d.h index 867b612ac..cad5539f9 100644 --- a/kleidicv/include/kleidicv/filters/separable_filter_2d.h +++ b/kleidicv/include/kleidicv/filters/separable_filter_2d.h @@ -21,6 +21,17 @@ KLEIDICV_API_DECLARATION(kleidicv_separable_filter_2d_stripe_u8, const uint8_t *kernel_y, size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +// For internal use only. See instead kleidicv_separable_filter_2d_u16. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_separable_filter_2d_stripe_u16, + const uint16_t *src, size_t src_stride, uint16_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + const uint16_t *kernel_x, size_t kernel_width, + const uint16_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } namespace kleidicv { @@ -34,6 +45,13 @@ kleidicv_error_t separable_filter_2d_stripe_u8( size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +kleidicv_error_t separable_filter_2d_stripe_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + } // namespace neon namespace sve2 { @@ -45,6 +63,13 @@ kleidicv_error_t separable_filter_2d_stripe_u8( size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +kleidicv_error_t separable_filter_2d_stripe_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + } // namespace sve2 namespace sme2 { @@ -56,6 +81,13 @@ kleidicv_error_t separable_filter_2d_stripe_u8( size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +kleidicv_error_t separable_filter_2d_stripe_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + } // namespace sme2 } // namespace kleidicv diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 6a7b1b141..543f5e7e2 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1271,6 +1271,12 @@ kleidicv_error_t kleidicv_separable_filter_2d_u8( size_t width, size_t height, size_t channels, const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +/// @copydoc kleidicv_separable_filter_2d_u8 +kleidicv_error_t kleidicv_separable_filter_2d_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint16_t *kernel_x, + size_t kernel_width, const uint16_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); /// Applies Gaussian blur to the source image using the specified parameters. /// In-place filtering is not supported. diff --git a/kleidicv/src/filters/separable_filter_2d_api.cpp b/kleidicv/src/filters/separable_filter_2d_api.cpp index 555b722af..e4adf6cea 100644 --- a/kleidicv/src/filters/separable_filter_2d_api.cpp +++ b/kleidicv/src/filters/separable_filter_2d_api.cpp @@ -13,6 +13,12 @@ KLEIDICV_MULTIVERSION_C_API( KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_stripe_u8), &kleidicv::sme2::separable_filter_2d_stripe_u8); +KLEIDICV_MULTIVERSION_C_API( + kleidicv_separable_filter_2d_stripe_u16, + &kleidicv::neon::separable_filter_2d_stripe_u16, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_stripe_u16), + &kleidicv::sme2::separable_filter_2d_stripe_u16); + extern "C" { using KLEIDICV_TARGET_NAMESPACE::Rectangle; @@ -72,4 +78,14 @@ kleidicv_error_t kleidicv_separable_filter_2d_u8( kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } +kleidicv_error_t kleidicv_separable_filter_2d_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint16_t *kernel_x, + size_t kernel_width, const uint16_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return kleidicv_separable_filter_2d_stripe_u16( + src, src_stride, dst, dst_stride, width, height, 0, height, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); +} + } // extern "C" diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index 6efd14969..7dfc6221b 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -109,6 +109,100 @@ class SeparableFilter2D { SourceVectorType kernel_y_u8_[5]; }; // end of class SeparableFilter2D +template <> +class SeparableFilter2D { + public: + using SourceType = uint16_t; + using SourceVectorType = typename VecTraits::VectorType; + using BufferType = uint32_t; + using BufferVectorType = typename VecTraits::VectorType; + using DestinationType = uint16_t; + + // Ignored because vectors are initialized in the constructor body. + // NOLINTNEXTLINE - hicpp-member-init + SeparableFilter2D(const SourceType *kernel_x, const SourceType *kernel_y) + : kernel_x_(kernel_x), kernel_y_(kernel_y) { + for (size_t i = 0; i < 5; i++) { + kernel_x_u32_[i] = vdupq_n_u32(kernel_x[i]); + kernel_y_u16_[i] = vdupq_n_u16(kernel_y[i]); + } + } + + void vertical_vector_path(SourceVectorType src[5], BufferType *dst) const { + SourceVectorType acc_l = + vmull_u16(vget_low_u16(src[0]), vget_low_u16(kernel_y_u16_[0])); + SourceVectorType acc_h = vmull_high_u16(src[0], kernel_y_u16_[0]); + + // Optimization to avoid unnecessary branching in vector code. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i < 5; i++) { + acc_l = vmlal_u16(acc_l, vget_low_u16(src[i]), + vget_low_u16(kernel_y_u16_[i])); + acc_h = vmlal_high_u16(acc_h, src[i], kernel_y_u16_[i]); + } + + vst1q_u32(&dst[0], acc_l); + vst1q_u32(&dst[4], acc_h); + } + + void vertical_scalar_path(const SourceType src[5], BufferType *dst) const { + BufferType acc = static_cast(src[0]) * kernel_y_[0]; + for (size_t i = 1; i < 5; i++) { + BufferType temp = static_cast(src[i]) * kernel_y_[i]; + if (__builtin_add_overflow(acc, temp, &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + } + + dst[0] = acc; + } + + void horizontal_vector_path(BufferVectorType src[5], + DestinationType *dst) const { + BufferVectorType acc = vmulq_u32(src[0], kernel_x_u32_[0]); + + // Optimization to avoid unnecessary branching in vector code. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i < 5; i++) { + acc = vmlaq_u32(acc, src[i], kernel_x_u32_[i]); + } + + uint16x4_t result = vqmovn_u32(acc); + vst1_u16(&dst[0], result); + } + + void horizontal_scalar_path(const BufferType src[5], + DestinationType *dst) const { + SourceType acc; // Avoid cppcoreguidelines-init-variables. NOLINT + if (__builtin_mul_overflow(src[0], kernel_x_[0], &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + + for (size_t i = 1; i < 5; i++) { + SourceType temp; // Avoid cppcoreguidelines-init-variables. NOLINT + if (__builtin_mul_overflow(src[i], kernel_x_[i], &temp)) { + dst[0] = std::numeric_limits::max(); + return; + } + if (__builtin_add_overflow(acc, temp, &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + } + + dst[0] = acc; + } + + private: + const SourceType *kernel_x_; + const SourceType *kernel_y_; + + BufferVectorType kernel_x_u32_[5]; + SourceVectorType kernel_y_u16_[5]; +}; // end of class SeparableFilter2D + template static kleidicv_error_t separable_filter_2d_checks( const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, @@ -183,4 +277,42 @@ kleidicv_error_t separable_filter_2d_stripe_u8( return KLEIDICV_OK; } +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t separable_filter_2d_stripe_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + auto *workspace = reinterpret_cast(context); + kleidicv_error_t checks_result = separable_filter_2d_checks( + src, src_stride, dst, dst_stride, width, height, channels, kernel_x, + kernel_width, kernel_y, kernel_height, workspace); + + if (checks_result != KLEIDICV_OK) { + return checks_result; + } + + auto fixed_border_type = get_fixed_border_type(border_type); + // if the std::optional is empty, that means that the border type is not + // supported, so there's no need to check for specific types + if (!fixed_border_type) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + Rectangle rect{width, height}; + + using SeparableFilterClass = SeparableFilter2D; + + SeparableFilterClass filterClass{kernel_x, kernel_y}; + SeparableFilter filter{filterClass}; + + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, + *fixed_border_type, filter); + + return KLEIDICV_OK; +} + } // namespace kleidicv::neon diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index 8330c66c2..fca4b62d4 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -138,6 +138,128 @@ class SeparableFilter2D { SourceVectorType &kernel_y_4_u8_; }; // end of class SeparableFilter2D +template <> +class SeparableFilter2D { + public: + using SourceType = uint16_t; + using SourceVectorType = typename VecTraits::VectorType; + using BufferType = uint32_t; + using BufferVectorType = typename VecTraits::VectorType; + using BufferDoubleVectorType = typename VecTraits::Vector2Type; + using DestinationType = uint16_t; + + SeparableFilter2D( + const SourceType *kernel_x, BufferVectorType &kernel_x_0_u32, + BufferVectorType &kernel_x_1_u32, BufferVectorType &kernel_x_2_u32, + BufferVectorType &kernel_x_3_u32, BufferVectorType &kernel_x_4_u32, + SourceVectorType &kernel_y_0_u16, SourceVectorType &kernel_y_1_u16, + SourceVectorType &kernel_y_2_u16, SourceVectorType &kernel_y_3_u16, + SourceVectorType &kernel_y_4_u16) + : kernel_x_(kernel_x), + kernel_x_0_u32_(kernel_x_0_u32), + kernel_x_1_u32_(kernel_x_1_u32), + kernel_x_2_u32_(kernel_x_2_u32), + kernel_x_3_u32_(kernel_x_3_u32), + kernel_x_4_u32_(kernel_x_4_u32), + + kernel_y_0_u16_(kernel_y_0_u16), + kernel_y_1_u16_(kernel_y_1_u16), + kernel_y_2_u16_(kernel_y_2_u16), + kernel_y_3_u16_(kernel_y_3_u16), + kernel_y_4_u16_(kernel_y_4_u16) {} + + void vertical_vector_path( + svbool_t pg, SourceVectorType src_0, SourceVectorType src_1, + SourceVectorType src_2, SourceVectorType src_3, SourceVectorType src_4, + BufferType *dst) const KLEIDICV_STREAMING_COMPATIBLE { + // 0 + BufferVectorType acc_b = svmullb_u32(src_0, kernel_y_0_u16_); + BufferVectorType acc_t = svmullt_u32(src_0, kernel_y_0_u16_); + + // 1 + acc_b = svmlalb_u32(acc_b, src_1, kernel_y_1_u16_); + acc_t = svmlalt_u32(acc_t, src_1, kernel_y_1_u16_); + + // 2 + acc_b = svmlalb_u32(acc_b, src_2, kernel_y_2_u16_); + acc_t = svmlalt_u32(acc_t, src_2, kernel_y_2_u16_); + + // 3 + acc_b = svmlalb_u32(acc_b, src_3, kernel_y_3_u16_); + acc_t = svmlalt_u32(acc_t, src_3, kernel_y_3_u16_); + + // 4 + acc_b = svmlalb_u32(acc_b, src_4, kernel_y_4_u16_); + acc_t = svmlalt_u32(acc_t, src_4, kernel_y_4_u16_); + + BufferDoubleVectorType interleaved = svcreate2_u32(acc_b, acc_t); + svst2(pg, &dst[0], interleaved); + } + + void horizontal_vector_path( + svbool_t pg, BufferVectorType src_0, BufferVectorType src_1, + BufferVectorType src_2, BufferVectorType src_3, BufferVectorType src_4, + DestinationType *dst) const KLEIDICV_STREAMING_COMPATIBLE { + // 0 + BufferVectorType acc = svmul_u32_x(pg, src_0, kernel_x_0_u32_); + + // 1 + acc = svmla_u32_x(pg, acc, src_1, kernel_x_1_u32_); + + // 2 + acc = svmla_u32_x(pg, acc, src_2, kernel_x_2_u32_); + + // 3 + acc = svmla_u32_x(pg, acc, src_3, kernel_x_3_u32_); + + // 4 + acc = svmla_u32_x(pg, acc, src_4, kernel_x_4_u32_); + + svbool_t greater = + svcmpgt_n_u32(pg, acc, std::numeric_limits::max()); + acc = svdup_n_u32_m(acc, greater, std::numeric_limits::max()); + svst1h_u32(pg, &dst[0], acc); + } + + void horizontal_scalar_path(const BufferType src[5], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + SourceType acc; // Avoid cppcoreguidelines-init-variables. NOLINT + if (__builtin_mul_overflow(src[0], kernel_x_[0], &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + + for (size_t i = 1; i < 5; i++) { + SourceType temp; // Avoid cppcoreguidelines-init-variables. NOLINT + if (__builtin_mul_overflow(src[i], kernel_x_[i], &temp)) { + dst[0] = std::numeric_limits::max(); + return; + } + if (__builtin_add_overflow(acc, temp, &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + } + + dst[0] = acc; + } + + private: + const SourceType *kernel_x_; + + BufferVectorType &kernel_x_0_u32_; + BufferVectorType &kernel_x_1_u32_; + BufferVectorType &kernel_x_2_u32_; + BufferVectorType &kernel_x_3_u32_; + BufferVectorType &kernel_x_4_u32_; + + SourceVectorType &kernel_y_0_u16_; + SourceVectorType &kernel_y_1_u16_; + SourceVectorType &kernel_y_2_u16_; + SourceVectorType &kernel_y_3_u16_; + SourceVectorType &kernel_y_4_u16_; +}; // end of class SeparableFilter2D + template static kleidicv_error_t separable_filter_2d_checks( const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, @@ -226,6 +348,58 @@ static kleidicv_error_t separable_filter_2d_stripe_u8_sc( return KLEIDICV_OK; } +static kleidicv_error_t separable_filter_2d_stripe_u16_sc( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { + auto *workspace = reinterpret_cast(context); + kleidicv_error_t checks_result = separable_filter_2d_checks( + src, src_stride, dst, dst_stride, width, height, channels, kernel_x, + kernel_width, kernel_y, kernel_height, workspace); + + if (checks_result != KLEIDICV_OK) { + return checks_result; + } + + auto fixed_border_type = get_fixed_border_type(border_type); + // if the std::optional is empty, that means that the border type is not + // supported, so there's no need to check for specific types + if (!fixed_border_type) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + Rectangle rect{width, height}; + + using SeparableFilterClass = SeparableFilter2D; + + svuint32_t kernel_x_0_u32 = svdup_n_u32(kernel_x[0]); + svuint32_t kernel_x_1_u32 = svdup_n_u32(kernel_x[1]); + svuint32_t kernel_x_2_u32 = svdup_n_u32(kernel_x[2]); + svuint32_t kernel_x_3_u32 = svdup_n_u32(kernel_x[3]); + svuint32_t kernel_x_4_u32 = svdup_n_u32(kernel_x[4]); + + svuint16_t kernel_y_0_u16 = svdup_n_u16(kernel_y[0]); + svuint16_t kernel_y_1_u16 = svdup_n_u16(kernel_y[1]); + svuint16_t kernel_y_2_u16 = svdup_n_u16(kernel_y[2]); + svuint16_t kernel_y_3_u16 = svdup_n_u16(kernel_y[3]); + svuint16_t kernel_y_4_u16 = svdup_n_u16(kernel_y[4]); + + SeparableFilterClass filterClass{ + kernel_x, kernel_x_0_u32, kernel_x_1_u32, kernel_x_2_u32, + kernel_x_3_u32, kernel_x_4_u32, kernel_y_0_u16, kernel_y_1_u16, + kernel_y_2_u16, kernel_y_3_u16, kernel_y_4_u16}; + SeparableFilter filter{filterClass}; + + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, + *fixed_border_type, filter); + + return KLEIDICV_OK; +} + } // namespace KLEIDICV_TARGET_NAMESPACE #endif // KLEIDICV_SEPARABLE_FILTER_2D_SC_H diff --git a/kleidicv/src/filters/separable_filter_2d_sme2.cpp b/kleidicv/src/filters/separable_filter_2d_sme2.cpp index ef3e24570..768d958b2 100644 --- a/kleidicv/src/filters/separable_filter_2d_sme2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sme2.cpp @@ -21,4 +21,18 @@ separable_filter_2d_stripe_u8(const uint8_t *src, size_t src_stride, kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +separable_filter_2d_stripe_u16(const uint16_t *src, size_t src_stride, + uint16_t *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, + size_t channels, const uint16_t *kernel_x, + size_t kernel_width, const uint16_t *kernel_y, + size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return separable_filter_2d_stripe_u16_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); +} + } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/separable_filter_2d_sve2.cpp b/kleidicv/src/filters/separable_filter_2d_sve2.cpp index 0ce142234..498806d60 100644 --- a/kleidicv/src/filters/separable_filter_2d_sve2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sve2.cpp @@ -19,4 +19,16 @@ kleidicv_error_t separable_filter_2d_stripe_u8( kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t separable_filter_2d_stripe_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return separable_filter_2d_stripe_u16_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); +} + } // namespace kleidicv::sve2 diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index ab45ef6e1..0267fa272 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -252,6 +252,13 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, kleidicv_thread_multithreading); +kleidicv_error_t kleidicv_thread_separable_filter_2d_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint16_t *kernel_x, + size_t kernel_width, const uint16_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading); + kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, size_t width, size_t height, size_t channels, diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 6d8506ee1..e01c06028 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -443,6 +443,23 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( kernel_height, context, mt); } +kleidicv_error_t kleidicv_thread_separable_filter_2d_u16( + const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint16_t *kernel_x, + size_t kernel_width, const uint16_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + auto callback = [=](size_t y_begin, size_t y_end, + kleidicv_filter_context_t *thread_context) { + return kleidicv_separable_filter_2d_stripe_u16( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_x, kernel_width, kernel_y, kernel_height, border_type, + thread_context); + }; + return kleidicv_thread_filter(callback, width, height, channels, kernel_width, + kernel_height, context, mt); +} + kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, size_t width, size_t height, size_t channels, diff --git a/test/api/test_separable_filter_2d.cpp b/test/api/test_separable_filter_2d.cpp index 5a0407325..df884d6dd 100644 --- a/test/api/test_separable_filter_2d.cpp +++ b/test/api/test_separable_filter_2d.cpp @@ -12,6 +12,7 @@ #include "test_config.h" KLEIDICV_API(separable_filter_2d, kleidicv_separable_filter_2d_u8, uint8_t) +KLEIDICV_API(separable_filter_2d, kleidicv_separable_filter_2d_u16, uint16_t) // Implements KernelTestParams for SeparableFilter2D operators. template @@ -26,6 +27,15 @@ struct SeparableFilter2DKernelTestParams { static constexpr size_t kKernelSize = KernelSize; }; // end of struct SeparableFilter2DKernelTestParams +template +struct SeparableFilter2DKernelTestParams { + using InputType = uint16_t; + using IntermediateType = uint16_t; + using OutputType = uint16_t; + + static constexpr size_t kKernelSize = KernelSize; +}; // end of struct SeparableFilter2DKernelTestParams + static constexpr std::array kDefaultBorder = { KLEIDICV_BORDER_TYPE_REPLICATE}; @@ -123,7 +133,7 @@ class SeparableFilter2DTest : public test::KernelTest { } }; // end of class SeparableFilter2DTest -using ElementTypes = ::testing::Types; +using ElementTypes = ::testing::Types; template class SeparableFilter2D : public testing::Test {}; @@ -148,7 +158,9 @@ TYPED_TEST(SeparableFilter2D, 5x5) { .test(mask, 5); } -TYPED_TEST(SeparableFilter2D, 5x5Overflow) { +TEST(SeparableFilter2D, 5x5_U8Overflow) { + using TypeParam = uint8_t; + kleidicv_filter_context_t *context = nullptr; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, 5, 5)); @@ -201,6 +213,90 @@ TYPED_TEST(SeparableFilter2D, 5x5Overflow) { EXPECT_EQ_ARRAY2D(dst_expected, dst); } +TEST(SeparableFilter2D, 5x5_U16Overflow) { + using TypeParam = uint16_t; + + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 5, 5, 7, 8)); + test::Array2D src{7, 8, test::Options::vector_length()}; + // clang-format off + src.set(0, 0, { 1, 2, 3, 4, 5, 6, 7}); + src.set(1, 0, { 2, 3, 4, 5, 6, 7, 8}); + src.set(2, 0, { 3, 4, 5, 6, 7, 8, 9}); + src.set(3, 0, { 4, 5, 6, 7, 8, 9, 1}); + src.set(4, 0, { 5, 6, 7, 8, 9, 1, 2}); + src.set(5, 0, { 6, 7, 8, 9, 1, 2, 3}); + src.set(6, 0, { 7, 8, 9, 1, 2, 3, 4}); + src.set(7, 0, { 8, 9, 1, 2, 3, 4, 5}); + // clang-format on + + test::Array2D kernel_x{5, 1}; + kernel_x.set(0, 0, {38, 0, 38, 0, 38}); + test::Array2D kernel_y{5, 1}; + kernel_y.set(0, 0, {38, 0, 38, 0, 38}); + + test::Array2D dst{7, 8, test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + 7, 8, 1, kernel_x.data(), 5, kernel_y.data(), 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + + test::Array2D dst_expected{7, 8, test::Options::vector_length()}; + // clang-format off + dst_expected.set(0, 0, { 30324, 38988, 47652, 60648, 65535, 65535, 65535}); + dst_expected.set(1, 0, { 38988, 47652, 56316, 65535, 65535, 65535, 65535}); + dst_expected.set(2, 0, { 47652, 56316, 64980, 64980, 65535, 65535, 65535}); + dst_expected.set(3, 0, { 60648, 65535, 64980, 65535, 64980, 65535, 56316}); + dst_expected.set(4, 0, { 65535, 65535, 65535, 64980, 65535, 60648, 65535}); + dst_expected.set(5, 0, { 65535, 65535, 64980, 65535, 51984, 60648, 43320}); + dst_expected.set(6, 0, { 65535, 65535, 65535, 60648, 60648, 43320, 51984}); + dst_expected.set(7, 0, { 65535, 65535, 56316, 65535, 43320, 51984, 47652}); + // clang-format on + EXPECT_EQ_ARRAY2D(dst_expected, dst); + + kernel_x.set(0, 0, {83, 94, 83, 94, 83}); + kernel_y.set(0, 0, {94, 83, 94, 83, 94}); + + // clang-format off + dst_expected.set(0, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + dst_expected.set(1, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + dst_expected.set(2, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + dst_expected.set(3, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + dst_expected.set(4, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + dst_expected.set(5, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + dst_expected.set(6, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + dst_expected.set(7, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + // clang-format on + + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + 7, 8, 1, kernel_x.data(), 5, kernel_y.data(), 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ_ARRAY2D(dst_expected, dst); + + // clang-format off + src.set(0, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + src.set(1, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + src.set(2, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + src.set(3, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + src.set(4, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + src.set(5, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + src.set(6, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + src.set(7, 0, { 65535, 65535, 65535, 65535, 65535, 65535, 65535}); + // clang-format on + + kernel_x.set(0, 0, {65535, 65535, 65535, 65535, 65535}); + kernel_y.set(0, 0, {65535, 65535, 65535, 65535, 65535}); + + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + 7, 8, 1, kernel_x.data(), 5, kernel_y.data(), 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + EXPECT_EQ_ARRAY2D(dst_expected, dst); +} + TYPED_TEST(SeparableFilter2D, NullPointer) { using KernelTestParams = SeparableFilter2DKernelTestParams; kleidicv_filter_context_t *context = nullptr; diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index bf456eb25..1abc6e92e 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -75,6 +75,35 @@ class Thread : public testing::TestWithParam

{ EXPECT_EQ(KLEIDICV_OK, multi_result); EXPECT_EQ_ARRAY2D(dst_multi, dst_single); } + + template + void check_separable_filter_2d(SingleThreadedFunc single_threaded_func, + MultithreadedFunc multithreaded_func) { + unsigned width = 0, height = 0, thread_count = 0; + std::tie(width, height, thread_count) = GetParam(); + (void)thread_count; + size_t channels = 1; + const size_t kernel_width = 5; + const size_t kernel_height = kernel_width; + + test::Array2D kernel_x{kernel_width, 1}; + kernel_x.set(0, 0, {1, 2, 3, 4, 5}); + test::Array2D kernel_y{kernel_height, 1}; + kernel_y.set(0, 0, {5, 6, 7, 8, 9}); + + kleidicv_border_type_t border_type = KLEIDICV_BORDER_TYPE_REPLICATE; + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, channels, kernel_width, + kernel_height, width, height)); + check_unary_op( + single_threaded_func, multithreaded_func, channels /*src_channels*/, + channels /*dst_channels*/, + /*remaining arguments passed to separable_filter_2d_... functions*/ + channels, kernel_x.data(), kernel_width, kernel_y.data(), kernel_height, + border_type, context); + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + } }; #define TEST_UNARY_OP(suffix, SrcT, DstT, ...) \ @@ -167,30 +196,13 @@ TEST_P(Thread, gaussian_blur_u8) { } TEST_P(Thread, separable_filter_2d_u8) { - unsigned width = 0, height = 0, thread_count = 0; - std::tie(width, height, thread_count) = GetParam(); - (void)thread_count; - size_t channels = 1; - const size_t kernel_width = 5; - const size_t kernel_height = kernel_width; - - test::Array2D kernel_x{kernel_width, 1}; - kernel_x.set(0, 0, {9, 9, 9, 9, 9}); - test::Array2D kernel_y{kernel_height, 1}; - kernel_y.set(0, 0, {5, 6, 7, 8, 9}); + check_separable_filter_2d(kleidicv_separable_filter_2d_u8, + kleidicv_thread_separable_filter_2d_u8); +} - kleidicv_border_type_t border_type = KLEIDICV_BORDER_TYPE_REPLICATE; - kleidicv_filter_context_t *context = nullptr; - ASSERT_EQ(KLEIDICV_OK, - kleidicv_filter_context_create(&context, channels, kernel_width, - kernel_height, width, height)); - check_unary_op( - kleidicv_separable_filter_2d_u8, kleidicv_thread_separable_filter_2d_u8, - channels /*src_channels*/, channels /*dst_channels*/, - /*remaining arguments passed to separable_filter_2d_u8 functions*/ - channels, kernel_x.data(), kernel_width, kernel_y.data(), kernel_height, - border_type, context); - ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +TEST_P(Thread, separable_filter_2d_u16) { + check_separable_filter_2d(kleidicv_separable_filter_2d_u16, + kleidicv_thread_separable_filter_2d_u16); } TEST_P(Thread, SobelHorizontal1Channel) { diff --git a/test/framework/border.cpp b/test/framework/border.cpp index ace94551d..5a3c6688f 100644 --- a/test/framework/border.cpp +++ b/test/framework/border.cpp @@ -313,4 +313,9 @@ template void prepare_borders(kleidicv_border_type_t, const Bordered *, TwoDimensional *); +template void prepare_borders(kleidicv_border_type_t, + kleidicv_border_values_t, + const Bordered *, + TwoDimensional *); + } // namespace test -- GitLab From 5076820e1f2cf8a04460091e875e1e5e82719bdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 15 Aug 2024 11:47:53 +0200 Subject: [PATCH 3/6] Add uint16 Separable Filter 2D conformity tests --- .../opencv/test_separable_filter_2d.cpp | 59 ++++++++++++------- conformity/opencv/utils.h | 9 +++ 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/conformity/opencv/test_separable_filter_2d.cpp b/conformity/opencv/test_separable_filter_2d.cpp index d56d52d79..2720d89a0 100644 --- a/conformity/opencv/test_separable_filter_2d.cpp +++ b/conformity/opencv/test_separable_filter_2d.cpp @@ -6,10 +6,10 @@ #include "tests.h" -template +template cv::Mat exec_separable_filter_2d(cv::Mat& input) { uint32_t kernel_seed = - *reinterpret_cast(&input.at(input.rows - 1, 0)); + *reinterpret_cast(&input.at(input.rows - 1, 0)); // clone is required, otherwise the result matrix is treated as part of a // bigger image, and it would have impact on what border types are supported cv::Mat input_mat = input.rowRange(0, input.rows - 1).clone(); @@ -27,7 +27,8 @@ cv::Mat exec_separable_filter_2d(cv::Mat& input) { } #if MANAGER -template +template bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, RecreatedMessageQueue& reply_queue) { cv::RNG rng(0); @@ -36,20 +37,21 @@ bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, for (size_t x = 5; x <= 16; ++x) { // One extra line allocated to be sure the kernel seed can be placed next // to the real input - cv::Mat input(y + 1, x, CV_8UC(Channels)); + cv::Mat input(y + 1, x, get_opencv_matrix_type()); rng.fill(input, cv::RNG::UNIFORM, 0, 7); uint32_t kernel_seed = rng.next(); // kernel seed is embedded into the input matrix - *reinterpret_cast(&input.at(input.rows - 1, 0)) = + *reinterpret_cast(&input.at(input.rows - 1, 0)) = kernel_seed; - cv::Mat actual = exec_separable_filter_2d(input); + cv::Mat actual = + exec_separable_filter_2d(input); cv::Mat expected = get_expected_from_subordinate(index, request_queue, reply_queue, input); - if (are_matrices_different(0, actual, expected)) { + if (are_matrices_different(0, actual, expected)) { fail_print_matrices(y, x, input, actual, expected); return true; } @@ -63,20 +65,35 @@ bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, std::vector& separable_filter_2d_tests_get() { // clang-format off static std::vector tests = { - TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), - TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), - TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), - TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), - - TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), - TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), - TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), - TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), - - TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 1>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), - TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 2>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), - TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 3>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), - TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 4>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT_101, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (u8), BORDER_REPLICATE, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REPLICATE, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REPLICATE, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REPLICATE, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT_101, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REFLECT, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (u16), BORDER_REPLICATE, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REPLICATE, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REPLICATE, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (u16), BORDER_REPLICATE, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), }; // clang-format on return tests; diff --git a/conformity/opencv/utils.h b/conformity/opencv/utils.h index 64f3625cd..b7ae8eaff 100644 --- a/conformity/opencv/utils.h +++ b/conformity/opencv/utils.h @@ -71,6 +71,15 @@ bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) { return false; } +template +constexpr int get_opencv_matrix_type() { + if constexpr (std::is_same_v) { + return CV_8UC(Channels); + } else if constexpr (std::is_same_v) { + return CV_16UC(Channels); + } +} + void fail_print_matrices(size_t height, size_t width, cv::Mat& input, cv::Mat& manager_result, cv::Mat& subord_result); -- GitLab From 692c6cd5ae5bc2bde462157f1489382f173b7040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 15 Aug 2024 12:15:17 +0200 Subject: [PATCH 4/6] Make Separable Filter 2D public and add documentation --- doc/functionality.md | 13 +++++++------ doc/opencv.md | 15 +++++++++++++++ kleidicv/include/kleidicv/kleidicv.h | 2 -- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/doc/functionality.md b/doc/functionality.md index 91d1858a8..11d3bd5e1 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -70,12 +70,13 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Transpose | x | x | x | x | ## Image filters -| | u8 | -|--------------------------------------|-----| -| Erode | x | -| Dilate | x | -| Sobel (3x3) | x | -| Gaussian Blur (3x3, 5x5, 7x7, 15x15) | x | +| | u8 | u16 | +|--------------------------------------|-----|-----| +| Erode | x | | +| Dilate | x | | +| Sobel (3x3) | x | | +| Separable Filter 2D (5x5) | x | x | +| Gaussian Blur (3x3, 5x5, 7x7, 15x15) | x | | ## Resize to quarter | | u8 | diff --git a/doc/opencv.md b/doc/opencv.md index 20c629301..e20a315d3 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -101,6 +101,21 @@ Notes on parameters: * `maxValue` - value that elements above `thresh` will be set to. * `thresholdType` - currently only binary threshold operation is supported ([cv::THRESH_BINARY](https://docs.opencv.org/5.x/d7/d1b/group__imgproc__misc.html#gaa9e58d2860d4afa658ef70a9b1115576)). +### `sepFilter2D` +Applies a separable linear filter to an image.\ +Currently only the 5x5 kernel size is supported with `CV_8U` and `CV_16U` source, destination and kernel depths. + +Notes on parameters: +* `src`, `dst`, `kernelX`, `kernelY` - the number of channels must match between source and destination. The types must match between source, destination and kernels. However, the kernels must have 1 channel. +* `anchor` - values other than `(-1, 1)` are not supported. +* `delta` - values other than `0.0` are not supported. +* `borderType` - pixel extrapolation method. +Supported [OpenCV border types](https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga209f2f4869e304c82d07739337eae7c5) are: + + `cv::BORDER_REPLICATE` + + `cv::BORDER_REFLECT` + + `cv::BORDER_WRAP` + + `cv::BORDER_REFLECT_101` + ### `gaussian_blur` Blurs an image using a Gaussian filter.\ Currently does not support non-zero margins. Kernel shape is restricted to square (`kernelWidth == kernelHeight`). The filter's diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 543f5e7e2..45d7867c6 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1224,8 +1224,6 @@ kleidicv_error_t kleidicv_filter_context_create( kleidicv_error_t kleidicv_filter_context_release( kleidicv_filter_context_t *context); -/// Internal - not part of the public API and its direct use is not supported. -/// /// Applies a two-dimensional separable filter to the source image using the /// specified parameters. In-place filtering is not supported. /// -- GitLab From 6023f9447a36ec500e593c24cf6cf73cfef22248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 21 Aug 2024 14:51:45 +0200 Subject: [PATCH 5/6] Add SepFilter2D conformity test to check channel number --- .../opencv/test_separable_filter_2d.cpp | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/conformity/opencv/test_separable_filter_2d.cpp b/conformity/opencv/test_separable_filter_2d.cpp index 2720d89a0..3c544681f 100644 --- a/conformity/opencv/test_separable_filter_2d.cpp +++ b/conformity/opencv/test_separable_filter_2d.cpp @@ -2,10 +2,28 @@ // // SPDX-License-Identifier: Apache-2.0 +#include #include #include "tests.h" +// This function tries running the Separable Filter 2D API via OpenCV with the +// specified InputType and KernelType. An exception should be thrown in case the +// constraint in the HAL has not been met. +// Returns a 1x1-sized boolean matrix. +template +cv::Mat exec_separable_filter_2d_channel_check(cv::Mat& input) { + cv::Mat kernel(KernelSize, 1, KernelType); + cv::Mat result; + try { + cv::sepFilter2D(input, result, -1, kernel, kernel, cv::Point(-1, -1), 0, + cv::BORDER_REPLICATE); + } catch (const cv::Exception&) { + return cv::Mat(1, 1, CV_8UC1, cv::Scalar(1)); + } + return cv::Mat(1, 1, CV_8UC1, cv::Scalar(0)); +} + template cv::Mat exec_separable_filter_2d(cv::Mat& input) { uint32_t kernel_seed = @@ -27,6 +45,38 @@ cv::Mat exec_separable_filter_2d(cv::Mat& input) { } #if MANAGER +// The purpose of this test is to check one of the initial constraints of the +// Separable Filter 2D HAL, that the kernel can only have one channel. +template +bool test_separable_filter_2d_channel_check( + int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat input(10, 10, InputType, cv::Scalar(0)); + + cv::Mat actual = + exec_separable_filter_2d_channel_check( + input); + cv::Mat expected = + get_expected_from_subordinate(index, request_queue, reply_queue, input); + + bool actual_exception_caught = actual.at(0, 0); + bool expected_exception_caught = expected.at(0, 0); + + if (actual_exception_caught != expected_exception_caught) { + std::cout << "[FAIL]" << std::endl + << "Actual: " + << (actual_exception_caught ? "exception" : "no exception") + << std::endl + << "Expected: " + << (expected_exception_caught ? "exception" : "no exception") + << std::endl + << std::endl; + return true; + } + + return false; +} + template bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, @@ -65,6 +115,11 @@ bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, std::vector& separable_filter_2d_tests_get() { // clang-format off static std::vector tests = { + TEST("Separable Filter 2D 5x5 channels: CV_8UC1 input, CV_8UC1 kernel", (test_separable_filter_2d_channel_check<5, CV_8UC1, CV_8UC1>), (exec_separable_filter_2d_channel_check<5, CV_8UC1, CV_8UC1>)), + TEST("Separable Filter 2D 5x5 channels: CV_8UC1 input, CV_8UC2 kernel", (test_separable_filter_2d_channel_check<5, CV_8UC1, CV_8UC2>), (exec_separable_filter_2d_channel_check<5, CV_8UC1, CV_8UC2>)), + TEST("Separable Filter 2D 5x5 channels: CV_8UC2 input, CV_8UC1 kernel", (test_separable_filter_2d_channel_check<5, CV_8UC2, CV_8UC1>), (exec_separable_filter_2d_channel_check<5, CV_8UC2, CV_8UC1>)), + TEST("Separable Filter 2D 5x5 channels: CV_8UC2 input, CV_8UC2 kernel", (test_separable_filter_2d_channel_check<5, CV_8UC2, CV_8UC2>), (exec_separable_filter_2d_channel_check<5, CV_8UC2, CV_8UC2>)), + TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), TEST("Separable Filter 2D 5x5 (u8), BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), -- GitLab From a2a9b767c8da8f0cc5f529be63b68acd5cb520b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 21 Aug 2024 18:01:33 +0200 Subject: [PATCH 6/6] Add benchmarks for Separable Filter 2D uint16 --- benchmark/benchmark.cpp | 24 +++++++++++++++++------- scripts/benchmark/run_benchmarks_4K.sh | 3 ++- scripts/benchmark/run_benchmarks_FHD.sh | 3 ++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 3fb5a3c2b..06839d728 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -245,8 +245,8 @@ static void resize_linear_8x8_f32(benchmark::State& state) { } BENCHMARK(resize_linear_8x8_f32); -template -static void separable_filter_2d(benchmark::State& state) { +template +static void separable_filter_2d(benchmark::State& state, F function) { kleidicv_filter_context_t* context; kleidicv_error_t err = kleidicv_filter_context_create( &context, Channels, KernelSize, KernelSize, image_width, image_height); @@ -256,10 +256,10 @@ static void separable_filter_2d(benchmark::State& state) { return; } - std::vector kernel(KernelSize, 2); + std::vector kernel(KernelSize, 2); - bench_functor(state, [context, kernel]() { - (void)kleidicv_separable_filter_2d_u8( + bench_functor(state, [context, kernel, function]() { + (void)function( get_source_buffer_a(), image_width * Channels * sizeof(T), get_destination_buffer(), image_width * Channels * sizeof(T), image_width, image_height, Channels, @@ -271,15 +271,25 @@ static void separable_filter_2d(benchmark::State& state) { } static void separable_filter_2d_u8_5x5_1ch(benchmark::State& state) { - separable_filter_2d(state); + separable_filter_2d(state, kleidicv_separable_filter_2d_u8); } BENCHMARK(separable_filter_2d_u8_5x5_1ch); static void separable_filter_2d_u8_5x5_3ch(benchmark::State& state) { - separable_filter_2d(state); + separable_filter_2d(state, kleidicv_separable_filter_2d_u8); } BENCHMARK(separable_filter_2d_u8_5x5_3ch); +static void separable_filter_2d_u16_5x5_1ch(benchmark::State& state) { + separable_filter_2d(state, kleidicv_separable_filter_2d_u16); +} +BENCHMARK(separable_filter_2d_u16_5x5_1ch); + +static void separable_filter_2d_u16_5x5_3ch(benchmark::State& state) { + separable_filter_2d(state, kleidicv_separable_filter_2d_u16); +} +BENCHMARK(separable_filter_2d_u16_5x5_3ch); + template static void gaussian_blur(benchmark::State& state) { kleidicv_filter_context_t* context; diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 6d7030843..4e7457479 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -39,7 +39,8 @@ benchmarks=( "BinaryThreshold: opencv_perf_imgproc '*ThreshFixture_Threshold.Threshold/*' '(3840x2160, 8UC1, THRESH_BINARY)'" - "SepFilter2D_5x5: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '(3840x2160, 8UC1, 5, BORDER_REPLICATE)'" + "SepFilter2D_5x5_U8: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '(3840x2160, 8UC1, 5, BORDER_REPLICATE)'" + "SepFilter2D_5x5_U16: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '(3840x2160, 16UC1, 5, BORDER_REPLICATE)'" "GaussianBlur3x3: opencv_perf_imgproc '*gaussianBlur3x3/*' '(3840x2160, 8UC1, BORDER_REPLICATE)'" "GaussianBlur5x5: opencv_perf_imgproc '*gaussianBlur5x5/*' '(3840x2160, 8UC1, BORDER_REPLICATE)'" diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index 8a183409d..3fb54ff7c 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -39,7 +39,8 @@ benchmarks=( "BinaryThreshold: opencv_perf_imgproc '*ThreshFixture_Threshold.Threshold/*' '(1920x1080, 8UC1, THRESH_BINARY)'" - "SepFilter2D_5x5: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '(1920x1080, 8UC1, 5, BORDER_REPLICATE)'" + "SepFilter2D_5x5_U8: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '(1920x1080, 8UC1, 5, BORDER_REPLICATE)'" + "SepFilter2D_5x5_U16: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '(1920x1080, 16UC1, 5, BORDER_REPLICATE)'" "GaussianBlur3x3: opencv_perf_imgproc '*gaussianBlur3x3/*' '(1920x1080, 8UC1, BORDER_REPLICATE)'" "GaussianBlur5x5: opencv_perf_imgproc '*gaussianBlur5x5/*' '(1920x1080, 8UC1, BORDER_REPLICATE)'" -- GitLab