From b28a308738f4b803727693c11e478a084e96e965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 10 Jul 2024 15:37:42 +0200 Subject: [PATCH 1/5] Fix Gaussian blur tests --- test/api/test_gaussian_blur.cpp | 80 ++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/test/api/test_gaussian_blur.cpp b/test/api/test_gaussian_blur.cpp index db663935d..8af85309f 100644 --- a/test/api/test_gaussian_blur.cpp +++ b/test/api/test_gaussian_blur.cpp @@ -539,9 +539,9 @@ TYPED_TEST(GaussianBlur, UnsupportedBorderType15x15) { } TYPED_TEST(GaussianBlur, DifferentKernelSize) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1] = {}, dst[1]; @@ -556,9 +556,9 @@ TYPED_TEST(GaussianBlur, DifferentKernelSize) { } TYPED_TEST(GaussianBlur, NonZeroSigma) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1] = {}, dst[1]; @@ -585,9 +585,9 @@ TYPED_TEST(GaussianBlur, NonZeroSigma) { } TYPED_TEST(GaussianBlur, UnsupportedKernelSize) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1] = {}, dst[1]; @@ -602,9 +602,9 @@ TYPED_TEST(GaussianBlur, UnsupportedKernelSize) { } TYPED_TEST(GaussianBlur, NullPointer) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1] = {}, dst[1]; @@ -628,9 +628,9 @@ TYPED_TEST(GaussianBlur, Misalignment) { // misalignment impossible return; } - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1] = {}, dst[1]; @@ -689,11 +689,11 @@ TYPED_TEST(GaussianBlur, ZeroImageSize3x3) { kleidicv_filter_context_create(&context, 1, 3, 3, 1, 1)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 1, 1, 3, 3, + src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 3, 1, 3, 3, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 1, 0, 1, 3, 3, + src, sizeof(TypeParam), dst, sizeof(TypeParam), 3, 0, 1, 3, 3, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -705,11 +705,11 @@ TYPED_TEST(GaussianBlur, ZeroImageSize5x5) { kleidicv_filter_context_create(&context, 1, 5, 5, 1, 1)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 1, 1, 5, 5, + src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 5, 1, 5, 5, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 1, 0, 1, 5, 5, + src, sizeof(TypeParam), dst, sizeof(TypeParam), 5, 0, 1, 5, 5, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -721,11 +721,11 @@ TYPED_TEST(GaussianBlur, ZeroImageSize7x7) { kleidicv_filter_context_create(&context, 1, 7, 7, 1, 1)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 1, 1, 7, 7, + src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 7, 1, 7, 7, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 1, 0, 1, 7, 7, + src, sizeof(TypeParam), dst, sizeof(TypeParam), 7, 0, 1, 7, 7, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -737,12 +737,12 @@ TYPED_TEST(GaussianBlur, ZeroImageSize15x15) { kleidicv_filter_context_create(&context, 1, 15, 15, 1, 1)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 1, 1, 15, 15, - 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); + src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 15, 1, 15, + 15, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), 1, 0, 1, 15, 15, - 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); + src, sizeof(TypeParam), dst, sizeof(TypeParam), 15, 0, 1, 15, + 15, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -1065,9 +1065,9 @@ TYPED_TEST(GaussianBlur, OversizeImage) { } TYPED_TEST(GaussianBlur, ChannelNumber) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); @@ -1081,9 +1081,9 @@ TYPED_TEST(GaussianBlur, ChannelNumber) { } TYPED_TEST(GaussianBlur, InvalidContextMaxChannels) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); @@ -1097,9 +1097,9 @@ TYPED_TEST(GaussianBlur, InvalidContextMaxChannels) { } TYPED_TEST(GaussianBlur, InvalidContextImageSize) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; + using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; + size_t validSize = KernelTestParams::kKernelSize - 1; ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); @@ -1123,7 +1123,7 @@ TYPED_TEST(GaussianBlur, InvalidContextImageSize) { EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } -TYPED_TEST(GaussianBlur, InvalidUnimplementedKernelSize) { +TYPED_TEST(GaussianBlur, InvalidKernelSize) { kleidicv_filter_context_t *context = nullptr; size_t kernel_size = 17; @@ -1144,6 +1144,32 @@ TYPED_TEST(GaussianBlur, InvalidUnimplementedKernelSize) { EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } +TYPED_TEST(GaussianBlur, InvalidBorderType) { + using KernelTestParams = GaussianBlurKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t validSize = KernelTestParams::kKernelSize - 1; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, + validSize, validSize)); + TypeParam src[1], dst[1]; + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), + validSize, validSize, 1, 15, 15, 0.0, 0.0, + KLEIDICV_BORDER_TYPE_CONSTANT, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), + validSize, validSize, 1, 15, 15, 0.0, 0.0, + KLEIDICV_BORDER_TYPE_TRANSPARENT, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), + validSize, validSize, 1, 15, 15, 0.0, 0.0, + KLEIDICV_BORDER_TYPE_NONE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + template static std::array generate_reference_kernel(float sigma) { std::array float_kernel{}; -- GitLab From 72ca6febc20024ab5e546cae47c7dceeb60ebc7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 11 Jul 2024 14:39:27 +0200 Subject: [PATCH 2/5] Refactor Gaussian blur API --- kleidicv/include/kleidicv/kleidicv.h | 15 ++++---- kleidicv/src/filters/gaussian_blur_neon.cpp | 38 ++++++++++++--------- kleidicv/src/filters/gaussian_blur_sc.h | 1 + 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 10a8e945e..040ac691b 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1198,15 +1198,18 @@ kleidicv_error_t kleidicv_filter_context_release( /// destination. The number of elements is limited to @ref /// KLEIDICV_MAX_IMAGE_PIXELS. /// -/// Usage: \n +/// Usage: +/// /// Before using this function, a context must be created using -/// kleidicv_filter_context_create, and after finished, it has to be released +/// kleidicv_filter_context_create, and when finished, it has to be released /// using kleidicv_filter_context_release. Please ensure that your filter /// context parameters are large enough, otherwise this API will return with an -/// error. \n Note, from the border types only these are supported: \n -/// - @ref KLEIDICV_BORDER_TYPE_REPLICATE \n -/// - @ref KLEIDICV_BORDER_TYPE_REFLECT \n -/// - @ref KLEIDICV_BORDER_TYPE_WRAP \n +/// error. +/// +/// Note, from the border types only these are supported: +/// - @ref KLEIDICV_BORDER_TYPE_REPLICATE +/// - @ref KLEIDICV_BORDER_TYPE_REFLECT +/// - @ref KLEIDICV_BORDER_TYPE_WRAP /// - @ref KLEIDICV_BORDER_TYPE_REVERSE /// /// @param src Pointer to the source data. Must be non-null. diff --git a/kleidicv/src/filters/gaussian_blur_neon.cpp b/kleidicv/src/filters/gaussian_blur_neon.cpp index 7405e2e71..bf4d82aa1 100644 --- a/kleidicv/src/filters/gaussian_blur_neon.cpp +++ b/kleidicv/src/filters/gaussian_blur_neon.cpp @@ -526,28 +526,29 @@ class GaussianBlur { sigma)) {} void vertical_vector_path(uint8x16_t src[KernelSize], BufferType *dst) const { - uint16x8_t acc_last_l = vmovl_u8(vget_low_u8(src[KernelSize >> 1])); - uint16x8_t acc_last_h = vmovl_u8(vget_high_u8(src[KernelSize >> 1])); + uint16x8_t initial_l = vmovl_u8(vget_low_u8(src[KernelSize >> 1])); + uint16x8_t initial_h = vmovl_u8(vget_high_u8(src[KernelSize >> 1])); uint32x4_t acc_l_l = - vmull_n_u16(vget_low_u16(acc_last_l), half_kernel_[KernelSize >> 1]); + vmull_n_u16(vget_low_u16(initial_l), half_kernel_[KernelSize >> 1]); uint32x4_t acc_l_h = - vmull_n_u16(vget_high_u16(acc_last_l), half_kernel_[KernelSize >> 1]); + vmull_n_u16(vget_high_u16(initial_l), half_kernel_[KernelSize >> 1]); uint32x4_t acc_h_l = - vmull_n_u16(vget_low_u16(acc_last_h), half_kernel_[KernelSize >> 1]); + vmull_n_u16(vget_low_u16(initial_h), half_kernel_[KernelSize >> 1]); uint32x4_t acc_h_h = - vmull_n_u16(vget_high_u16(acc_last_h), half_kernel_[KernelSize >> 1]); + vmull_n_u16(vget_high_u16(initial_h), half_kernel_[KernelSize >> 1]); + // Optimization to avoid unnecessary branching in vector code. KLEIDICV_FORCE_LOOP_UNROLL for (size_t i = 0; i < (KernelSize >> 1); i++) { - size_t j = KernelSize - i - 1; - uint16x8_t acc_l = vaddl_u8(vget_low_u8(src[i]), vget_low_u8(src[j])); - uint16x8_t acc_h = vaddl_u8(vget_high_u8(src[i]), vget_high_u8(src[j])); - - acc_l_l = vmlal_n_u16(acc_l_l, vget_low_u16(acc_l), half_kernel_[i]); - acc_l_h = vmlal_n_u16(acc_l_h, vget_high_u16(acc_l), half_kernel_[i]); - acc_h_l = vmlal_n_u16(acc_h_l, vget_low_u16(acc_h), half_kernel_[i]); - acc_h_h = vmlal_n_u16(acc_h_h, vget_high_u16(acc_h), half_kernel_[i]); + const size_t j = KernelSize - i - 1; + uint16x8_t vec_l = vaddl_u8(vget_low_u8(src[i]), vget_low_u8(src[j])); + uint16x8_t vec_h = vaddl_u8(vget_high_u8(src[i]), vget_high_u8(src[j])); + + acc_l_l = vmlal_n_u16(acc_l_l, vget_low_u16(vec_l), half_kernel_[i]); + acc_l_h = vmlal_n_u16(acc_l_h, vget_high_u16(vec_l), half_kernel_[i]); + acc_h_l = vmlal_n_u16(acc_h_l, vget_low_u16(vec_h), half_kernel_[i]); + acc_h_h = vmlal_n_u16(acc_h_h, vget_high_u16(vec_h), half_kernel_[i]); } uint32x4x4_t result = {acc_l_l, acc_l_h, acc_h_l, acc_h_h}; @@ -559,6 +560,7 @@ class GaussianBlur { BufferType *dst) const { uint32_t acc = static_cast(src[0]) * half_kernel_[0]; + // Optimization to avoid unnecessary branching in vector code. KLEIDICV_FORCE_LOOP_UNROLL for (size_t i = 1; i <= (KernelSize >> 1); i++) { acc += static_cast(src[i]) * half_kernel_[i]; @@ -578,11 +580,12 @@ class GaussianBlur { uint32x4_t acc = vmulq_n_u32(src[KernelSize >> 1], half_kernel_[KernelSize >> 1]); + // Optimization to avoid unnecessary branching in vector code. KLEIDICV_FORCE_LOOP_UNROLL for (size_t i = 0; i < (KernelSize >> 1); i++) { - size_t j = KernelSize - i - 1; - uint32x4_t acc_inner = vaddq_u32(src[i], src[j]); - acc = vmlaq_n_u32(acc, acc_inner, half_kernel_[i]); + const size_t j = KernelSize - i - 1; + uint32x4_t vec_inner = vaddq_u32(src[i], src[j]); + acc = vmlaq_n_u32(acc, vec_inner, half_kernel_[i]); } uint32x4_t acc_u32 = vrshrq_n_u32(acc, 16); @@ -597,6 +600,7 @@ class GaussianBlur { DestinationType *dst) const { uint32_t acc = src[0] * half_kernel_[0]; + // Optimization to avoid unnecessary branching in vector code. KLEIDICV_FORCE_LOOP_UNROLL for (size_t i = 1; i <= (KernelSize >> 1); i++) { acc += src[i] * half_kernel_[i]; diff --git a/kleidicv/src/filters/gaussian_blur_sc.h b/kleidicv/src/filters/gaussian_blur_sc.h index 5c33af67c..16bd0a3ea 100644 --- a/kleidicv/src/filters/gaussian_blur_sc.h +++ b/kleidicv/src/filters/gaussian_blur_sc.h @@ -589,6 +589,7 @@ class GaussianBlur final dst[0] = static_cast(rounding_shift_right(acc, 16)); } }; // end of class GaussianBlur + template <> class GaussianBlur final : public GaussianBlurNonBinomialBase { -- GitLab From 8874d161eabb86de983dc0515a0c121a2e8ae1ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 11 Jul 2024 14:46:24 +0200 Subject: [PATCH 3/5] Implement initial Separable Filter 2D API NEON Currently only the following configuration is supported: - Type: uint8 - Border: REPLICATE - Kernel size: 5x5 --- .../kleidicv/filters/separable_filter_2d.h | 45 ++ kleidicv/include/kleidicv/kleidicv.h | 50 +++ kleidicv/src/filters/gaussian_blur_api.cpp | 52 --- .../src/filters/separable_filter_2d_api.cpp | 64 +++ .../src/filters/separable_filter_2d_neon.cpp | 143 +++++++ .../src/filters/separable_filter_2d_sme2.cpp | 17 + .../src/filters/separable_filter_2d_sve2.cpp | 19 + test/api/test_gaussian_blur.cpp | 53 --- test/api/test_separable_filter_2d.cpp | 405 ++++++++++++++++++ test/framework/generator.h | 18 + 10 files changed, 761 insertions(+), 105 deletions(-) create mode 100644 kleidicv/include/kleidicv/filters/separable_filter_2d.h create mode 100644 kleidicv/src/filters/separable_filter_2d_api.cpp create mode 100644 kleidicv/src/filters/separable_filter_2d_neon.cpp create mode 100644 kleidicv/src/filters/separable_filter_2d_sme2.cpp create mode 100644 kleidicv/src/filters/separable_filter_2d_sve2.cpp create mode 100644 test/api/test_separable_filter_2d.cpp diff --git a/kleidicv/include/kleidicv/filters/separable_filter_2d.h b/kleidicv/include/kleidicv/filters/separable_filter_2d.h new file mode 100644 index 000000000..514d88a5a --- /dev/null +++ b/kleidicv/include/kleidicv/filters/separable_filter_2d.h @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_FILTERS_SEPARABLE_FILTER_2D_H +#define KLEIDICV_FILTERS_SEPARABLE_FILTER_2D_H + +#include "kleidicv/config.h" +#include "kleidicv/types.h" + +namespace kleidicv { + +namespace neon { + +kleidicv_error_t separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +} // namespace neon + +namespace sve2 { + +kleidicv_error_t separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +} // namespace sve2 + +namespace sme2 { + +kleidicv_error_t separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +} // namespace sme2 + +} // namespace kleidicv + +#endif // KLEIDICV_FILTERS_SEPARABLE_FILTER_2D_H diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 040ac691b..125ac8cfa 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1191,6 +1191,56 @@ kleidicv_error_t kleidicv_filter_context_create( kleidicv_error_t kleidicv_filter_context_release( kleidicv_filter_context_t *context); +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Applies a two-dimensional separable filter to the source image using the +/// specified parameters. In-place filtering is not supported. +/// +/// Width and height are assumed to be the same for the source and for the +/// destination. The number of elements is limited to @ref +/// KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// Usage: +/// +/// Before using this function, a context must be created using +/// kleidicv_filter_context_create, and when finished, it has to be released +/// using kleidicv_filter_context_release. Please ensure that your filter +/// context parameters are large enough, otherwise this API will return with an +/// error. +/// +/// Note, from the border types only KLEIDICV_BORDER_TYPE_REPLICATE is +/// supported. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row in the source data. Must be a +/// multiple of sizeof(type) and no less than width * +/// sizeof(type) * channels, except for single-row images. +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row in the destination data. Must be +/// a multiple of sizeof(type) and no less than width * +/// sizeof(type) * channels, except for single-row images. +/// @param width Number of columns in the data. (One column consists of +/// 'channels' number of elements.) +/// @param height Number of rows in the data. +/// @param channels Number of channels in the data. Must be not more than +/// @ref KLEIDICV_MAXIMUM_CHANNEL_COUNT. +/// @param kernel_x Pointer to the horizontal 2D kernel values. +/// @param kernel_width Size of the horizontal 2D kernel. +/// @param kernel_y Pointer to the vertical 2D kernel values. +/// @param kernel_height Size of the vertical 2D kernel. +/// @param border_type Way of handling the border. +/// @param context Pointer to filter context. +/// +KLEIDICV_API_DECLARATION(kleidicv_separable_filter_2d_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, + const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + /// Applies Gaussian blur to the source image using the specified parameters. /// In-place filtering is not supported. /// diff --git a/kleidicv/src/filters/gaussian_blur_api.cpp b/kleidicv/src/filters/gaussian_blur_api.cpp index 01ace13a7..161cf666d 100644 --- a/kleidicv/src/filters/gaussian_blur_api.cpp +++ b/kleidicv/src/filters/gaussian_blur_api.cpp @@ -5,58 +5,6 @@ #include "kleidicv/dispatch.h" #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/kleidicv.h" -#include "kleidicv/workspace/separable.h" - -extern "C" { - -using KLEIDICV_TARGET_NAMESPACE::Rectangle; -using KLEIDICV_TARGET_NAMESPACE::SeparableFilterWorkspace; - -kleidicv_error_t kleidicv_filter_context_create( - kleidicv_filter_context_t **context, size_t max_channels, - size_t max_kernel_width, size_t max_kernel_height, size_t max_image_width, - size_t max_image_height) { - CHECK_POINTERS(context); - - if (max_kernel_width != max_kernel_height) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; - } - - if (max_channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { - return KLEIDICV_ERROR_RANGE; - } - - CHECK_IMAGE_SIZE(max_image_width, max_image_height); - - // We can use the maximum size that accommodates everything due to the lack of - // information at this stage. - constexpr size_t intermediate_size = sizeof(uint32_t); - auto workspace = SeparableFilterWorkspace::create( - Rectangle{max_image_width, max_image_height}, max_channels, - intermediate_size); - if (!workspace) { - *context = nullptr; - return KLEIDICV_ERROR_ALLOCATION; - } - - *context = reinterpret_cast(workspace.release()); - return KLEIDICV_OK; -} - -kleidicv_error_t kleidicv_filter_context_release( - kleidicv_filter_context_t *context) { - CHECK_POINTERS(context); - - // Deliberately create and immediately destroy a unique_ptr to delete the - // workspace. - // NOLINTBEGIN(bugprone-unused-raii) - SeparableFilterWorkspace::Pointer{ - reinterpret_cast(context)}; - // NOLINTEND(bugprone-unused-raii) - return KLEIDICV_OK; -} - -} // extern "C" KLEIDICV_MULTIVERSION_C_API( kleidicv_gaussian_blur_u8, &kleidicv::neon::gaussian_blur_u8, diff --git a/kleidicv/src/filters/separable_filter_2d_api.cpp b/kleidicv/src/filters/separable_filter_2d_api.cpp new file mode 100644 index 000000000..5e6a222a2 --- /dev/null +++ b/kleidicv/src/filters/separable_filter_2d_api.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/filters/separable_filter_2d.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/workspace/separable.h" + +extern "C" { + +using KLEIDICV_TARGET_NAMESPACE::Rectangle; +using KLEIDICV_TARGET_NAMESPACE::SeparableFilterWorkspace; + +kleidicv_error_t kleidicv_filter_context_create( + kleidicv_filter_context_t **context, size_t max_channels, + size_t max_kernel_width, size_t max_kernel_height, size_t max_image_width, + size_t max_image_height) { + CHECK_POINTERS(context); + + if (max_kernel_width != max_kernel_height) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (max_channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { + return KLEIDICV_ERROR_RANGE; + } + + CHECK_IMAGE_SIZE(max_image_width, max_image_height); + + // As we cannot predict the intermediate size based on the parameters given, + // just use the largest possible size out of all available operations. + constexpr size_t intermediate_size = sizeof(uint32_t); + auto workspace = SeparableFilterWorkspace::create( + Rectangle{max_image_width, max_image_height}, max_channels, + intermediate_size); + if (!workspace) { + *context = nullptr; + return KLEIDICV_ERROR_ALLOCATION; + } + + *context = reinterpret_cast(workspace.release()); + return KLEIDICV_OK; +} + +kleidicv_error_t kleidicv_filter_context_release( + kleidicv_filter_context_t *context) { + CHECK_POINTERS(context); + + // Deliberately create and immediately destroy a unique_ptr to delete the + // workspace. + // NOLINTBEGIN(bugprone-unused-raii) + SeparableFilterWorkspace::Pointer{ + reinterpret_cast(context)}; + // NOLINTEND(bugprone-unused-raii) + return KLEIDICV_OK; +} + +} // extern "C" + +KLEIDICV_MULTIVERSION_C_API( + kleidicv_separable_filter_2d_u8, &kleidicv::neon::separable_filter_2d_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_u8), + &kleidicv::sme2::separable_filter_2d_u8); diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp new file mode 100644 index 000000000..35f32c088 --- /dev/null +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -0,0 +1,143 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/ctypes.h" +#include "kleidicv/filters/separable_filter_2d.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" +#include "kleidicv/separable_filter_5x5_neon.h" + +namespace kleidicv::neon { + +template +class SeparableFilter2D; + +template <> +class SeparableFilter2D { + public: + using SourceType = uint8_t; + using BufferType = uint32_t; + using DestinationType = uint8_t; + + explicit SeparableFilter2D(const uint8_t *kernel_x, const uint8_t *kernel_y) + : kernel_x_(kernel_x), kernel_y_(kernel_y) {} + + void vertical_vector_path(uint8x16_t src[5], BufferType *dst) const { + uint16x8_t initial_l = vmovl_u8(vget_low_u8(src[0])); + uint16x8_t initial_h = vmovl_u8(vget_high_u8(src[0])); + + uint32x4_t acc_l_l = vmull_n_u16(vget_low_u16(initial_l), kernel_y_[0]); + uint32x4_t acc_l_h = vmull_n_u16(vget_high_u16(initial_l), kernel_y_[0]); + uint32x4_t acc_h_l = vmull_n_u16(vget_low_u16(initial_h), kernel_y_[0]); + uint32x4_t acc_h_h = vmull_n_u16(vget_high_u16(initial_h), kernel_y_[0]); + + // Optimization to avoid unnecessary branching in vector code. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i < 5; i++) { + uint16x8_t vec_l = vmovl_u8(vget_low_u8(src[i])); + uint16x8_t vec_h = vmovl_u8(vget_high_u8(src[i])); + + acc_l_l = vmlal_n_u16(acc_l_l, vget_low_u16(vec_l), kernel_y_[i]); + acc_l_h = vmlal_n_u16(acc_l_h, vget_high_u16(vec_l), kernel_y_[i]); + acc_h_l = vmlal_n_u16(acc_h_l, vget_low_u16(vec_h), kernel_y_[i]); + acc_h_h = vmlal_n_u16(acc_h_h, vget_high_u16(vec_h), kernel_y_[i]); + } + + uint32x4x4_t result = {acc_l_l, acc_l_h, acc_h_l, acc_h_h}; + + vst1q_u32_x4(&dst[0], result); + } + + void vertical_scalar_path(const SourceType src[5], BufferType *dst) const { + uint32_t acc = static_cast(src[0]) * kernel_y_[0] + + static_cast(src[1]) * kernel_y_[1] + + static_cast(src[2]) * kernel_y_[2] + + static_cast(src[3]) * kernel_y_[3] + + static_cast(src[4]) * kernel_y_[4]; + + dst[0] = acc; + } + + void horizontal_vector_path(uint32x4_t src[5], DestinationType *dst) const { + uint32x4_t acc = vmulq_n_u32(src[0], kernel_x_[0]); + acc = vmlaq_n_u32(acc, src[1], kernel_x_[1]); + acc = vmlaq_n_u32(acc, src[2], kernel_x_[2]); + acc = vmlaq_n_u32(acc, src[3], kernel_x_[3]); + acc = vmlaq_n_u32(acc, src[4], kernel_x_[4]); + + uint16x4_t narrowed = vmovn_u32(acc); + uint8x8_t interleaved = + vuzp1_u8(vreinterpret_u8_u16(narrowed), vreinterpret_u8_u16(narrowed)); + uint32_t result = vget_lane_u32(vreinterpret_u32_u8(interleaved), 0); + memcpy(&dst[0], &result, sizeof(result)); + } + + void horizontal_scalar_path(const BufferType src[5], + DestinationType *dst) const { + uint32_t acc = src[0] * kernel_x_[0] + src[1] * kernel_x_[1] + + src[2] * kernel_x_[2] + src[3] * kernel_x_[3] + + src[4] * kernel_x_[4]; + + dst[0] = static_cast(acc); + } + + private: + const uint8_t *kernel_x_; + const uint8_t *kernel_y_; +}; + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + CHECK_POINTERS(context, kernel_x, kernel_y); + auto *workspace = reinterpret_cast(context); + auto fixed_border_type = get_fixed_border_type(border_type); + + if (kernel_width != 5 || kernel_height != 5) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + if (width < kernel_width - 1 || height < kernel_width - 1) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { + return KLEIDICV_ERROR_RANGE; + } + + if (workspace->channels() < channels) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + Rectangle rect{width, height}; + const Rectangle &context_rect = workspace->image_size(); + if (context_rect.width() < width || context_rect.height() < height) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + if (!fixed_border_type || *fixed_border_type != FixedBorderType::REPLICATE) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + using SeparableFilterClass = SeparableFilter2D; + + SeparableFilterClass filterClass{kernel_x, kernel_y}; + SeparableFilter filter{filterClass}; + + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; + workspace->process(rect, src_rows, dst_rows, channels, + FixedBorderType::REPLICATE, filter); + + return KLEIDICV_OK; +} + +} // namespace kleidicv::neon diff --git a/kleidicv/src/filters/separable_filter_2d_sme2.cpp b/kleidicv/src/filters/separable_filter_2d_sme2.cpp new file mode 100644 index 000000000..eb117f6d3 --- /dev/null +++ b/kleidicv/src/filters/separable_filter_2d_sme2.cpp @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/filters/separable_filter_2d.h" + +namespace kleidicv::sme2 { + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +separable_filter_2d_u8(const uint8_t *, size_t, uint8_t *, size_t, size_t, + size_t, size_t, const uint8_t *, size_t, const uint8_t *, + size_t, kleidicv_border_type_t, + kleidicv_filter_context_t *) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; +} + +} // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/separable_filter_2d_sve2.cpp b/kleidicv/src/filters/separable_filter_2d_sve2.cpp new file mode 100644 index 000000000..dbc31cdd5 --- /dev/null +++ b/kleidicv/src/filters/separable_filter_2d_sve2.cpp @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/filters/separable_filter_2d.h" + +namespace kleidicv::sve2 { + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t separable_filter_2d_u8(const uint8_t *, size_t, uint8_t *, + size_t, size_t, size_t, size_t, + const uint8_t *, size_t, + const uint8_t *, size_t, + kleidicv_border_type_t, + kleidicv_filter_context_t *) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; +} + +} // namespace kleidicv::sve2 diff --git a/test/api/test_gaussian_blur.cpp b/test/api/test_gaussian_blur.cpp index 8af85309f..e8f1becf2 100644 --- a/test/api/test_gaussian_blur.cpp +++ b/test/api/test_gaussian_blur.cpp @@ -1228,56 +1228,3 @@ TYPED_TEST(GaussianBlur, KernelGenerationFromSigma) { test_sigma<4>(); test_sigma<8>(); } - -#ifdef KLEIDICV_ALLOCATION_TESTS -TEST(FilterCreate, CannotAllocateFilter) { - MockMallocToFail::enable(); - kleidicv_filter_context_t *context = nullptr; - EXPECT_EQ(KLEIDICV_ERROR_ALLOCATION, - kleidicv_filter_context_create(&context, 1, 1, 1, - KLEIDICV_MAX_IMAGE_PIXELS, 1)); - MockMallocToFail::disable(); -} -#endif - -TEST(FilterCreate, OversizeImage) { - kleidicv_filter_context_t *context = nullptr; - - for (kleidicv_rectangle_t rect : { - kleidicv_rectangle_t{KLEIDICV_MAX_IMAGE_PIXELS + 1, 1}, - kleidicv_rectangle_t{KLEIDICV_MAX_IMAGE_PIXELS, - KLEIDICV_MAX_IMAGE_PIXELS}, - }) { - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - kleidicv_filter_context_create(&context, 1, 1, 1, rect.width, - rect.height)); - ASSERT_EQ(nullptr, context); - } -} - -TEST(FilterCreate, DifferentKernelSize) { - kleidicv_filter_context_t *context = nullptr; - - EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, - kleidicv_filter_context_create(&context, 1, 7, 15, 1, 1)); - ASSERT_EQ(nullptr, context); -} - -TEST(FilterCreate, ChannelNumber) { - kleidicv_filter_context_t *context = nullptr; - - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - kleidicv_filter_context_create( - &context, KLEIDICV_MAXIMUM_CHANNEL_COUNT + 1, 1, 1, 1, 1)); - ASSERT_EQ(nullptr, context); -} - -TEST(FilterCreate, NullPointer) { - EXPECT_EQ(KLEIDICV_ERROR_NULL_POINTER, - kleidicv_filter_context_create(nullptr, 1, 1, 1, 1, 1)); -} - -TEST(FilterRelease, NullPointer) { - EXPECT_EQ(KLEIDICV_ERROR_NULL_POINTER, - kleidicv_filter_context_release(nullptr)); -} diff --git a/test/api/test_separable_filter_2d.cpp b/test/api/test_separable_filter_2d.cpp new file mode 100644 index 000000000..f0a001a64 --- /dev/null +++ b/test/api/test_separable_filter_2d.cpp @@ -0,0 +1,405 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/kernel.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" +#include "test_config.h" + +KLEIDICV_API(separable_filter_2d, kleidicv_separable_filter_2d_u8, uint8_t) + +// Implements KernelTestParams for SeparableFilter2D operators. +template +struct SeparableFilter2DKernelTestParams; + +template +struct SeparableFilter2DKernelTestParams { + using InputType = uint8_t; + using IntermediateType = uint64_t; + using OutputType = uint8_t; + + static constexpr size_t kKernelSize = KernelSize; +}; // end of struct SeparableFilter2DKernelTestParams + +static constexpr std::array kDefaultBorder = { + KLEIDICV_BORDER_TYPE_REPLICATE}; + +template +std::unique_ptr> +make_generator_ptr(IterableType &elements) { + test::Generator *pg = + new test::SequenceGenerator(elements); + return std::unique_ptr>( + pg); +} + +// Test for SeparableFilter2D operator. +template +class SeparableFilter2DTest : public test::KernelTest { + using Base = test::KernelTest; + using typename test::KernelTest::InputType; + using typename test::KernelTest::IntermediateType; + using typename test::KernelTest::OutputType; + + public: + explicit SeparableFilter2DTest(const InputType *kernel_x, + const InputType *kernel_y) + : kernel_x_(kernel_x), + kernel_y_(kernel_y), + small_array_layouts_{test::small_array_layouts( + KernelTestParams::kKernelSize, KernelTestParams::kKernelSize)} { + array_layout_generator_ = make_generator_ptr(small_array_layouts_); + border_type_generator_ = make_generator_ptr(kDefaultBorder); + } + + SeparableFilter2DTest &with_array_layouts( + std::unique_ptr> g) { + array_layout_generator_ = std::move(g); + return *this; + } + + SeparableFilter2DTest &with_border_types( + std::unique_ptr> g) { + border_type_generator_ = std::move(g); + return *this; + } + + void test(test::Array2D mask, InputType max_value) { + test::Kernel kernel{mask}; + // Use the default border values for testing. + auto kSupportedBorderValues = test::default_border_values(); + // Create generators and execute test. + test::SequenceGenerator tested_border_values{kSupportedBorderValues}; + test::PseudoRandomNumberGeneratorIntRange element_generator{ + 0, max_value}; + Base::test(kernel, *array_layout_generator_, *border_type_generator_, + tested_border_values, element_generator); + } + + protected: + const InputType *kernel_x_; + const InputType *kernel_y_; + std::array small_array_layouts_; + std::unique_ptr> array_layout_generator_; + std::unique_ptr> + border_type_generator_; + + kleidicv_error_t call_api(const test::Array2D *input, + test::Array2D *output, + kleidicv_border_type_t border_type, + kleidicv_border_values_t) override { + kleidicv_filter_context_t *context = nullptr; + auto ret = kleidicv_filter_context_create( + &context, input->channels(), KernelTestParams::kKernelSize, + KernelTestParams::kKernelSize, input->width() / input->channels(), + input->height()); + if (ret != KLEIDICV_OK) { + return ret; + } + + ret = separable_filter_2d()( + input->data(), input->stride(), output->data(), output->stride(), + input->width() / input->channels(), input->height(), input->channels(), + kernel_x_, KernelTestParams::kKernelSize, kernel_y_, + KernelTestParams::kKernelSize, border_type, context); + auto releaseRet = kleidicv_filter_context_release(context); + if (releaseRet != KLEIDICV_OK) { + return releaseRet; + } + + return ret; + } +}; // end of class SeparableFilter2DTest + +using ElementTypes = ::testing::Types; + +template +class SeparableFilter2D : public testing::Test {}; + +TYPED_TEST_SUITE(SeparableFilter2D, ElementTypes); + +// Tests kleidicv_separable_filter_2d_ API. +TYPED_TEST(SeparableFilter2D, 5x5) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + // 5x5 SeparableFilter2D operator. + test::Array2D mask{5, 5}; + // clang-format off + mask.set(0, 0, { 4, 2, 0, 4, 2}); + mask.set(1, 0, { 2, 1, 0, 2, 1}); + mask.set(2, 0, { 0, 0, 0, 0, 0}); + mask.set(3, 0, { 4, 2, 0, 4, 2}); + mask.set(4, 0, { 2, 1, 0, 2, 1}); + // clang-format on + uint8_t kernel[5] = {2, 1, 0, 2, 1}; + SeparableFilter2DTest{kernel, kernel}.test(mask, 7); +} + +TYPED_TEST(SeparableFilter2D, NullPointer) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t validSize = KernelTestParams::kKernelSize - 1; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, + validSize, validSize)); + TypeParam src[1] = {}, dst[1], kernel[5] = {}; + test::test_null_args(separable_filter_2d(), src, sizeof(TypeParam), + dst, sizeof(TypeParam), validSize, validSize, 1, kernel, + 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, ZeroImageSize) { + TypeParam src[1] = {}, dst[1], kernel[5] = {}; + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 5, 5, 1, 1)); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), 0, 5, 1, kernel, + 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), 5, 0, 1, kernel, + 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, ValidImageSize) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t validSize = KernelTestParams::kKernelSize - 1; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, + validSize, validSize)); + test::Array2D src{validSize, validSize, + test::Options::vector_length()}; + test::Array2D dst{validSize, validSize, + test::Options::vector_length()}; + TypeParam kernel[5] = {}; + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + validSize, validSize, 1, kernel, 5, kernel, 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, UndersizeImage) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t underSize = KernelTestParams::kKernelSize - 2; + size_t validSize = KernelTestParams::kKernelSize - 1; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, + validSize, validSize)); + TypeParam src[1] = {}, dst[1], kernel[5] = {}; + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), underSize, underSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), underSize, validSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, underSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, OversizeImage) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 5, 5, 1, 1)); + TypeParam src[1], dst[1], kernel[5] = {}; + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, kernel, 5, kernel, 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), + KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS, 1, kernel, + 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, ChannelNumber) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t validSize = KernelTestParams::kKernelSize - 1; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, + validSize, validSize)); + TypeParam src[1], dst[1], kernel[5] = {}; + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, + validSize, KLEIDICV_MAXIMUM_CHANNEL_COUNT + 1, kernel, 5, + kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, InvalidContextMaxChannels) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t validSize = KernelTestParams::kKernelSize - 1; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, + validSize, validSize)); + TypeParam src[1], dst[1], kernel[5] = {}; + EXPECT_EQ( + KLEIDICV_ERROR_CONTEXT_MISMATCH, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, + 2, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, InvalidContextImageSize) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t validSize = KernelTestParams::kKernelSize - 1; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, + validSize, validSize)); + TypeParam src[1], dst[1], kernel[5] = {}; + EXPECT_EQ(KLEIDICV_ERROR_CONTEXT_MISMATCH, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize + 1, + validSize, 1, kernel, 5, kernel, 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_ERROR_CONTEXT_MISMATCH, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, + validSize + 1, 1, kernel, 5, kernel, 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_ERROR_CONTEXT_MISMATCH, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize + 1, + validSize + 1, 1, kernel, 5, kernel, 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, InvalidKernelSize) { + kleidicv_filter_context_t *context = nullptr; + constexpr size_t kernel_size = 17; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, kernel_size, kernel_size, kernel_size, + kernel_size)); + TypeParam src[kernel_size], dst[kernel_size], kernel[kernel_size] = {}; + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), kernel_size, + kernel_size, 1, kernel, kernel_size, kernel, 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), kernel_size, + kernel_size, 1, kernel, 5, kernel, kernel_size, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(SeparableFilter2D, InvalidBorderType) { + using KernelTestParams = SeparableFilter2DKernelTestParams; + kleidicv_filter_context_t *context = nullptr; + size_t validSize = KernelTestParams::kKernelSize - 1; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, + validSize, validSize)); + TypeParam src[1], dst[1], kernel[5] = {}; + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_CONSTANT, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REFLECT, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_WRAP, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REVERSE, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_TRANSPARENT, context)); + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + separable_filter_2d()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, + 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_NONE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +#ifdef KLEIDICV_ALLOCATION_TESTS +TEST(FilterCreate, CannotAllocateFilter) { + MockMallocToFail::enable(); + kleidicv_filter_context_t *context = nullptr; + EXPECT_EQ(KLEIDICV_ERROR_ALLOCATION, + kleidicv_filter_context_create(&context, 1, 1, 1, + KLEIDICV_MAX_IMAGE_PIXELS, 1)); + MockMallocToFail::disable(); +} +#endif + +TEST(FilterCreate, OversizeImage) { + kleidicv_filter_context_t *context = nullptr; + + for (kleidicv_rectangle_t rect : { + kleidicv_rectangle_t{KLEIDICV_MAX_IMAGE_PIXELS + 1, 1}, + kleidicv_rectangle_t{KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS}, + }) { + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_filter_context_create(&context, 1, 1, 1, rect.width, + rect.height)); + ASSERT_EQ(nullptr, context); + } +} + +TEST(FilterCreate, DifferentKernelSize) { + kleidicv_filter_context_t *context = nullptr; + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_filter_context_create(&context, 1, 7, 15, 1, 1)); + ASSERT_EQ(nullptr, context); +} + +TEST(FilterCreate, ChannelNumber) { + kleidicv_filter_context_t *context = nullptr; + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_filter_context_create( + &context, KLEIDICV_MAXIMUM_CHANNEL_COUNT + 1, 1, 1, 1, 1)); + ASSERT_EQ(nullptr, context); +} + +TEST(FilterCreate, NullPointer) { + EXPECT_EQ(KLEIDICV_ERROR_NULL_POINTER, + kleidicv_filter_context_create(nullptr, 1, 1, 1, 1, 1)); +} + +TEST(FilterRelease, NullPointer) { + EXPECT_EQ(KLEIDICV_ERROR_NULL_POINTER, + kleidicv_filter_context_release(nullptr)); +} diff --git a/test/framework/generator.h b/test/framework/generator.h index ced45aaae..3870737e3 100644 --- a/test/framework/generator.h +++ b/test/framework/generator.h @@ -43,6 +43,24 @@ class PseudoRandomNumberGenerator : public Generator { std::mt19937_64 rng_; }; // end of class PseudoRandomNumberGenerator +// Generates pseudo-random integers of a given type within the range [min, max]. +template , bool> = true> +class PseudoRandomNumberGeneratorIntRange + : public PseudoRandomNumberGenerator { + public: + PseudoRandomNumberGeneratorIntRange(ElementType min, ElementType max) + : PseudoRandomNumberGenerator(), dist_(min, max) {} + + // Yields the next value or std::nullopt. + std::optional next() override { + return static_cast(dist_(this->rng_)); + } + + protected: + std::uniform_int_distribution dist_; +}; // end of class PseudoRandomNumberGeneratorIntRange + // Generator which yields values of an iterable container. template class SequenceGenerator : public Generator { -- GitLab From 025d13cfc8366162fc8798a75e6710067ed42bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 11 Jul 2024 14:48:09 +0200 Subject: [PATCH 4/5] Implement initial Separable Filter 2D API SVE2/SME2 --- kleidicv/src/filters/separable_filter_2d_sc.h | 170 ++++++++++++++++++ .../src/filters/separable_filter_2d_sme2.cpp | 15 +- .../src/filters/separable_filter_2d_sve2.cpp | 16 +- 3 files changed, 189 insertions(+), 12 deletions(-) create mode 100644 kleidicv/src/filters/separable_filter_2d_sc.h diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h new file mode 100644 index 000000000..4ecac2b91 --- /dev/null +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -0,0 +1,170 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_SEPARABLE_FILTER_2D_SC_H +#define KLEIDICV_SEPARABLE_FILTER_2D_SC_H + +#include "kleidicv/kleidicv.h" +#include "kleidicv/separable_filter_5x5_sc.h" +#include "kleidicv/sve2.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +template +class SeparableFilter2D; + +template <> +class SeparableFilter2D { + public: + using SourceType = uint8_t; + using BufferType = uint32_t; + using DestinationType = uint8_t; + + explicit SeparableFilter2D(const uint8_t *kernel_x, const uint8_t *kernel_y) + : kernel_x_(kernel_x), kernel_y_(kernel_y) {} + + void vertical_vector_path(svbool_t pg, svuint8_t src_0, svuint8_t src_1, + svuint8_t src_2, svuint8_t src_3, svuint8_t src_4, + BufferType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + // 2 + svuint16_t vec_0_b = svmovlb_u16(src_0); + svuint16_t vec_0_t = svmovlt_u16(src_0); + + svuint32_t acc_b_b = svmullb_n_u32(vec_0_b, kernel_y_[0]); + svuint32_t acc_b_t = svmullb_n_u32(vec_0_t, kernel_y_[0]); + svuint32_t acc_t_b = svmullt_n_u32(vec_0_b, kernel_y_[0]); + svuint32_t acc_t_t = svmullt_n_u32(vec_0_t, kernel_y_[0]); + + // 1 + svuint16_t vec_1_b = svmovlb_u16(src_1); + svuint16_t vec_1_t = svmovlt_u16(src_1); + + acc_b_b = svmlalb_n_u32(acc_b_b, vec_1_b, kernel_y_[1]); + acc_b_t = svmlalb_n_u32(acc_b_t, vec_1_t, kernel_y_[1]); + acc_t_b = svmlalt_n_u32(acc_t_b, vec_1_b, kernel_y_[1]); + acc_t_t = svmlalt_n_u32(acc_t_t, vec_1_t, kernel_y_[1]); + + // 2 + svuint16_t vec_2_b = svmovlb_u16(src_2); + svuint16_t vec_2_t = svmovlt_u16(src_2); + + acc_b_b = svmlalb_n_u32(acc_b_b, vec_2_b, kernel_y_[2]); + acc_b_t = svmlalb_n_u32(acc_b_t, vec_2_t, kernel_y_[2]); + acc_t_b = svmlalt_n_u32(acc_t_b, vec_2_b, kernel_y_[2]); + acc_t_t = svmlalt_n_u32(acc_t_t, vec_2_t, kernel_y_[2]); + + // 3 + svuint16_t vec_3_b = svmovlb_u16(src_3); + svuint16_t vec_3_t = svmovlt_u16(src_3); + + acc_b_b = svmlalb_n_u32(acc_b_b, vec_3_b, kernel_y_[3]); + acc_b_t = svmlalb_n_u32(acc_b_t, vec_3_t, kernel_y_[3]); + acc_t_b = svmlalt_n_u32(acc_t_b, vec_3_b, kernel_y_[3]); + acc_t_t = svmlalt_n_u32(acc_t_t, vec_3_t, kernel_y_[3]); + + // 4 + svuint16_t vec_4_b = svmovlb_u16(src_4); + svuint16_t vec_4_t = svmovlt_u16(src_4); + + acc_b_b = svmlalb_n_u32(acc_b_b, vec_4_b, kernel_y_[4]); + acc_b_t = svmlalb_n_u32(acc_b_t, vec_4_t, kernel_y_[4]); + acc_t_b = svmlalt_n_u32(acc_t_b, vec_4_b, kernel_y_[4]); + acc_t_t = svmlalt_n_u32(acc_t_t, vec_4_t, kernel_y_[4]); + + svuint32x4_t interleaved = svcreate4(acc_b_b, acc_b_t, acc_t_b, acc_t_t); + svst4(pg, &dst[0], interleaved); + } + + void horizontal_vector_path(svbool_t pg, svuint32_t src_0, svuint32_t src_1, + svuint32_t src_2, svuint32_t src_3, + svuint32_t src_4, DestinationType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + // 0 + svuint32_t acc = svmul_n_u32_x(pg, src_0, kernel_x_[0]); + + // 1 + acc = svmla_n_u32_x(pg, acc, src_1, kernel_x_[1]); + + // 2 + acc = svmla_n_u32_x(pg, acc, src_2, kernel_x_[2]); + + // 3 + acc = svmla_n_u32_x(pg, acc, src_3, kernel_x_[3]); + + // 4 + acc = svmla_n_u32_x(pg, acc, src_4, kernel_x_[4]); + + svst1b_u32(pg, &dst[0], acc); + } + + void horizontal_scalar_path(const BufferType src[5], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + uint32_t acc = src[0] * kernel_x_[0] + src[1] * kernel_x_[1] + + src[2] * kernel_x_[2] + src[3] * kernel_x_[3] + + src[4] * kernel_x_[4]; + dst[0] = static_cast(acc); + } + + private: + const uint8_t *kernel_x_; + const uint8_t *kernel_y_; +}; // end of class SeparableFilter2D + +static kleidicv_error_t separable_filter_2d_u8_sc( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { + CHECK_POINTERS(context, kernel_x, kernel_y); + auto *workspace = reinterpret_cast(context); + auto fixed_border_type = get_fixed_border_type(border_type); + + if (kernel_width != 5 || kernel_height != 5) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + if (width < kernel_width - 1 || height < kernel_width - 1) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { + return KLEIDICV_ERROR_RANGE; + } + + if (workspace->channels() < channels) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + Rectangle rect{width, height}; + const Rectangle &context_rect = workspace->image_size(); + if (context_rect.width() < width || context_rect.height() < height) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + if (!fixed_border_type || *fixed_border_type != FixedBorderType::REPLICATE) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + using SeparableFilterClass = SeparableFilter2D; + + SeparableFilterClass filterClass{kernel_x, kernel_y}; + SeparableFilter filter{filterClass}; + + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; + workspace->process(rect, src_rows, dst_rows, channels, + FixedBorderType::REPLICATE, filter); + + return KLEIDICV_OK; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_SEPARABLE_FILTER_2D_SC_H diff --git a/kleidicv/src/filters/separable_filter_2d_sme2.cpp b/kleidicv/src/filters/separable_filter_2d_sme2.cpp index eb117f6d3..fc0857178 100644 --- a/kleidicv/src/filters/separable_filter_2d_sme2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sme2.cpp @@ -3,15 +3,20 @@ // SPDX-License-Identifier: Apache-2.0 #include "kleidicv/filters/separable_filter_2d.h" +#include "separable_filter_2d_sc.h" namespace kleidicv::sme2 { KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -separable_filter_2d_u8(const uint8_t *, size_t, uint8_t *, size_t, size_t, - size_t, size_t, const uint8_t *, size_t, const uint8_t *, - size_t, kleidicv_border_type_t, - kleidicv_filter_context_t *) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; +separable_filter_2d_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return separable_filter_2d_u8_sc( + src, src_stride, dst, dst_stride, width, height, channels, kernel_x, + kernel_width, kernel_y, kernel_height, border_type, context); } } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/separable_filter_2d_sve2.cpp b/kleidicv/src/filters/separable_filter_2d_sve2.cpp index dbc31cdd5..0de532c1c 100644 --- a/kleidicv/src/filters/separable_filter_2d_sve2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sve2.cpp @@ -3,17 +3,19 @@ // SPDX-License-Identifier: Apache-2.0 #include "kleidicv/filters/separable_filter_2d.h" +#include "separable_filter_2d_sc.h" namespace kleidicv::sve2 { KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t separable_filter_2d_u8(const uint8_t *, size_t, uint8_t *, - size_t, size_t, size_t, size_t, - const uint8_t *, size_t, - const uint8_t *, size_t, - kleidicv_border_type_t, - kleidicv_filter_context_t *) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; +kleidicv_error_t separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return separable_filter_2d_u8_sc( + src, src_stride, dst, dst_stride, width, height, channels, kernel_x, + kernel_width, kernel_y, kernel_height, border_type, context); } } // namespace kleidicv::sve2 -- GitLab From 11c9441269ef9cc61341407588d6882d5fae1812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 11 Jul 2024 16:38:49 +0200 Subject: [PATCH 5/5] Add benchmarks for initial Separable Filter 2D --- benchmark/benchmark.cpp | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 59825c41d..157b007a9 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -242,13 +242,48 @@ static void resize_linear_8x8_f32(benchmark::State& state) { } BENCHMARK(resize_linear_8x8_f32); +template +static void separable_filter_2d(benchmark::State& state) { + kleidicv_filter_context_t* context; + kleidicv_error_t err = kleidicv_filter_context_create( + &context, Channels, KernelSize, KernelSize, image_width, image_height); + if (err != KLEIDICV_OK) { + state.SkipWithError( + "Could not initialize SeparableFilter2D filter context."); + return; + } + + std::vector kernel(KernelSize, 2); + + bench_functor(state, [context, kernel]() { + (void)kleidicv_separable_filter_2d_u8( + get_source_buffer_a(), image_width * Channels * sizeof(T), + get_destination_buffer(), + image_width * Channels * sizeof(T), image_width, image_height, Channels, + kernel.data(), KernelSize, kernel.data(), KernelSize, + KLEIDICV_BORDER_TYPE_REPLICATE, context); + }); + + (void)kleidicv_filter_context_release(context); +} + +static void separable_filter_2d_u8_5x5_1ch(benchmark::State& state) { + separable_filter_2d(state); +} +BENCHMARK(separable_filter_2d_u8_5x5_1ch); + +static void separable_filter_2d_u8_5x5_3ch(benchmark::State& state) { + separable_filter_2d(state); +} +BENCHMARK(separable_filter_2d_u8_5x5_3ch); + template static void gaussian_blur(benchmark::State& state) { kleidicv_filter_context_t* context; kleidicv_error_t err = kleidicv_filter_context_create( &context, Channels, KernelSize, KernelSize, image_width, image_height); if (err != KLEIDICV_OK) { - state.SkipWithError("Could not initialize Gaussian blur filter."); + state.SkipWithError("Could not initialize Gaussian blur filter context."); return; } -- GitLab