From 2da98e2982b6dc91219d96fdb5e33684a7ffd828 Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Tue, 13 Aug 2024 11:44:57 +0000 Subject: [PATCH 1/3] Multithread non-binomial Gaussian blur --- adapters/opencv/kleidicv_hal.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index bb8914f4f..a9f8472e6 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -533,11 +533,12 @@ int gaussian_blur(const uchar *src_data, size_t src_step, uchar *dst_data, return convert_error(create_err); } - kleidicv_error_t blur_err = kleidicv_gaussian_blur_u8( + auto mt = get_multithreading(); + kleidicv_error_t blur_err = kleidicv_thread_gaussian_blur_u8( reinterpret_cast(src_data), src_step, reinterpret_cast(dst_data), dst_step, width, height, cn, kernel_width, kernel_height, sigma_x, sigma_y, kleidicv_border_type, - context); + context, mt); kleidicv_error_t release_err = kleidicv_filter_context_release(context); -- GitLab From 35c32f5bafe437d62c1793b32fdb6ce7a9e441eb Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Tue, 13 Aug 2024 11:05:31 +0000 Subject: [PATCH 2/3] Multithread separable filter --- adapters/opencv/kleidicv_hal.cpp | 6 ++- .../kleidicv/filters/separable_filter_2d.h | 42 ++++++++++----- .../src/filters/separable_filter_2d_api.cpp | 23 ++++++-- .../src/filters/separable_filter_2d_neon.cpp | 11 ++-- kleidicv/src/filters/separable_filter_2d_sc.h | 10 ++-- .../src/filters/separable_filter_2d_sme2.cpp | 20 +++---- .../src/filters/separable_filter_2d_sve2.cpp | 15 +++--- .../include/kleidicv_thread/kleidicv_thread.h | 7 +++ kleidicv_thread/src/kleidicv_thread.cpp | 52 +++++++++++++++---- test/api/test_thread.cpp | 27 ++++++++++ 10 files changed, 160 insertions(+), 53 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index a9f8472e6..18176f226 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -423,12 +423,14 @@ int separable_filter_2d_operation(cvhalFilter2D *context, uchar *src_data, params->cached_max_image_height = height_sz; } - kleidicv_error_t filter_err = kleidicv_separable_filter_2d_u8( + auto mt = get_multithreading(); + + kleidicv_error_t filter_err = kleidicv_thread_separable_filter_2d_u8( reinterpret_cast(src_data), src_step, reinterpret_cast(dst_data), dst_step, static_cast(width), static_cast(height), params->channels, params->kernel_x, params->kernel_width, params->kernel_y, - params->kernel_height, params->border_type, filter_context); + params->kernel_height, params->border_type, filter_context, mt); return convert_error(filter_err); } diff --git a/kleidicv/include/kleidicv/filters/separable_filter_2d.h b/kleidicv/include/kleidicv/filters/separable_filter_2d.h index 514d88a5a..867b612ac 100644 --- a/kleidicv/include/kleidicv/filters/separable_filter_2d.h +++ b/kleidicv/include/kleidicv/filters/separable_filter_2d.h @@ -6,37 +6,55 @@ #define KLEIDICV_FILTERS_SEPARABLE_FILTER_2D_H #include "kleidicv/config.h" +#include "kleidicv/kleidicv.h" #include "kleidicv/types.h" +extern "C" { +// For internal use only. See instead kleidicv_separable_filter_2d_u8. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_separable_filter_2d_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, + const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); +} + namespace kleidicv { namespace neon { -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } // namespace neon namespace sve2 { -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } // namespace sve2 namespace sme2 { -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } // namespace sme2 diff --git a/kleidicv/src/filters/separable_filter_2d_api.cpp b/kleidicv/src/filters/separable_filter_2d_api.cpp index 5e6a222a2..028a66f01 100644 --- a/kleidicv/src/filters/separable_filter_2d_api.cpp +++ b/kleidicv/src/filters/separable_filter_2d_api.cpp @@ -59,6 +59,23 @@ kleidicv_error_t kleidicv_filter_context_release( } // extern "C" KLEIDICV_MULTIVERSION_C_API( - kleidicv_separable_filter_2d_u8, &kleidicv::neon::separable_filter_2d_u8, - KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_u8), - &kleidicv::sme2::separable_filter_2d_u8); + kleidicv_separable_filter_2d_stripe_u8, + &kleidicv::neon::separable_filter_2d_stripe_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_stripe_u8), + &kleidicv::sme2::separable_filter_2d_stripe_u8); + +namespace kleidicv { +static kleidicv_error_t separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return kleidicv_separable_filter_2d_stripe_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); +} +} // namespace kleidicv + +KLEIDICV_MULTIVERSION_C_API(kleidicv_separable_filter_2d_u8, + &kleidicv::separable_filter_2d_u8, nullptr, + nullptr); diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index c9a5834c4..2a29a9d68 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -146,11 +146,12 @@ static kleidicv_error_t separable_filter_2d_checks( } KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { auto *workspace = reinterpret_cast(context); kleidicv_error_t checks_result = separable_filter_2d_checks( src, src_stride, dst, dst_stride, width, height, channels, kernel_x, @@ -176,7 +177,7 @@ kleidicv_error_t separable_filter_2d_u8( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, *fixed_border_type, filter); return KLEIDICV_OK; diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index e03e460d9..8330c66c2 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -174,11 +174,11 @@ static kleidicv_error_t separable_filter_2d_checks( return KLEIDICV_OK; } -static kleidicv_error_t separable_filter_2d_u8_sc( +static kleidicv_error_t separable_filter_2d_stripe_u8_sc( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { auto *workspace = reinterpret_cast(context); kleidicv_error_t checks_result = separable_filter_2d_checks( @@ -220,7 +220,7 @@ static kleidicv_error_t separable_filter_2d_u8_sc( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, *fixed_border_type, filter); return KLEIDICV_OK; diff --git a/kleidicv/src/filters/separable_filter_2d_sme2.cpp b/kleidicv/src/filters/separable_filter_2d_sme2.cpp index fc0857178..ef3e24570 100644 --- a/kleidicv/src/filters/separable_filter_2d_sme2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sme2.cpp @@ -8,15 +8,17 @@ namespace kleidicv::sme2 { KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -separable_filter_2d_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, - size_t dst_stride, size_t width, size_t height, - size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, - size_t kernel_height, kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) { - return separable_filter_2d_u8_sc( - src, src_stride, dst, dst_stride, width, height, channels, kernel_x, - kernel_width, kernel_y, kernel_height, border_type, context); +separable_filter_2d_stripe_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, + size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return separable_filter_2d_stripe_u8_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/separable_filter_2d_sve2.cpp b/kleidicv/src/filters/separable_filter_2d_sve2.cpp index 0de532c1c..0ce142234 100644 --- a/kleidicv/src/filters/separable_filter_2d_sve2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sve2.cpp @@ -8,14 +8,15 @@ namespace kleidicv::sve2 { KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { - return separable_filter_2d_u8_sc( - src, src_stride, dst, dst_stride, width, height, channels, kernel_x, - kernel_width, kernel_y, kernel_height, border_type, context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return separable_filter_2d_stripe_u8_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } } // namespace kleidicv::sve2 diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index 2af9555cb..da87a6be4 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -245,6 +245,13 @@ kleidicv_error_t kleidicv_thread_gaussian_blur_u8( kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, kleidicv_thread_multithreading); +kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 47ae4b9b6..4a9731da4 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -10,6 +10,7 @@ #include #include "kleidicv/filters/gaussian_blur.h" +#include "kleidicv/filters/separable_filter_2d.h" #include "kleidicv/kleidicv.h" typedef std::function FunctionCallback; @@ -366,12 +367,12 @@ kleidicv_error_t parallel_min_max_loc(FunctionType min_max_loc_func, DEFINE_KLEIDICV_THREAD_MIN_MAX_LOC(u8, uint8_t); -kleidicv_error_t kleidicv_thread_gaussian_blur_u8( - const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, size_t kernel_width, - size_t kernel_height, float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, - kleidicv_thread_multithreading mt) { +template +kleidicv_error_t kleidicv_thread_filter(F filter, size_t width, size_t height, + size_t channels, size_t kernel_width, + size_t kernel_height, + kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { FunctionCallback callback = [=](unsigned y_begin, unsigned y_end) { // The context contains a buffer that can only fit a single row, so can't be // shared between threads. Since we don't know how many threads there are, @@ -392,10 +393,7 @@ kleidicv_error_t kleidicv_thread_gaussian_blur_u8( // GCOVR_EXCL_STOP } - kleidicv_error_t result = kleidicv_gaussian_blur_stripe_u8( - src, src_stride, dst, dst_stride, width, height, y_begin, y_end, - channels, kernel_width, kernel_height, sigma_x, sigma_y, border_type, - thread_context); + kleidicv_error_t result = filter(y_begin, y_end, thread_context); if (create_context) { kleidicv_error_t context_release_result = @@ -409,3 +407,37 @@ kleidicv_error_t kleidicv_thread_gaussian_blur_u8( return mt.parallel(kleidicv_thread_std_function_callback, &callback, mt.parallel_data, height); } + +kleidicv_error_t kleidicv_thread_gaussian_blur_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + auto callback = [=](size_t y_begin, size_t y_end, + kleidicv_filter_context_t *thread_context) { + return kleidicv_gaussian_blur_stripe_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_width, kernel_height, sigma_x, sigma_y, border_type, + thread_context); + }; + return kleidicv_thread_filter(callback, width, height, channels, kernel_width, + kernel_height, context, mt); +} + +kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + auto callback = [=](size_t y_begin, size_t y_end, + kleidicv_filter_context_t *thread_context) { + return kleidicv_separable_filter_2d_stripe_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_x, kernel_width, kernel_y, kernel_height, border_type, + thread_context); + }; + return kleidicv_thread_filter(callback, width, height, channels, kernel_width, + kernel_height, context, mt); +} \ No newline at end of file diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 8ecd82e15..56bdb6703 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -166,6 +166,33 @@ TEST_P(Thread, gaussian_blur_u8) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } +TEST_P(Thread, separable_filter_2d_u8) { + unsigned width = 0, height = 0, thread_count = 0; + std::tie(width, height, thread_count) = GetParam(); + (void)thread_count; + size_t channels = 1; + const size_t kernel_width = 5; + const size_t kernel_height = kernel_width; + + test::Array2D kernel_x{kernel_width, 1}; + kernel_x.set(0, 0, {9, 9, 9, 9, 9}); + test::Array2D kernel_y{kernel_height, 1}; + kernel_y.set(0, 0, {5, 6, 7, 8, 9}); + + kleidicv_border_type_t border_type = KLEIDICV_BORDER_TYPE_REPLICATE; + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, channels, kernel_width, + kernel_height, width, height)); + check_unary_op( + kleidicv_separable_filter_2d_u8, kleidicv_thread_separable_filter_2d_u8, + channels /*src_channels*/, channels /*dst_channels*/, + /*remaining arguments passed to separable_filter_2d_u8 functions*/ + channels, kernel_x.data(), kernel_width, kernel_y.data(), kernel_height, + border_type, context); + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + INSTANTIATE_TEST_SUITE_P(, Thread, testing::Values(P{1, 1, 1}, P{1, 2, 1}, P{1, 2, 2}, P{2, 1, 2}, P{2, 2, 1}, P{1, 3, 2}, -- GitLab From 8d73864009eba13faef0633281f6aa1f19e61ab4 Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Tue, 13 Aug 2024 11:44:30 +0000 Subject: [PATCH 3/3] Multithread Sobel --- adapters/opencv/kleidicv_hal.cpp | 10 ++-- kleidicv/include/kleidicv/filters/sobel.h | 59 +++++++++++-------- kleidicv/src/filters/sobel_api.cpp | 30 ++++++++-- kleidicv/src/filters/sobel_neon.cpp | 20 +++---- kleidicv/src/filters/sobel_sc.h | 12 ++-- kleidicv/src/filters/sobel_sme2.cpp | 24 ++++---- kleidicv/src/filters/sobel_sve2.cpp | 20 ++++--- .../include/kleidicv_thread/kleidicv_thread.h | 10 ++++ kleidicv_thread/src/kleidicv_thread.cpp | 29 ++++++++- test/api/test_thread.cpp | 24 ++++++++ 10 files changed, 171 insertions(+), 67 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 18176f226..860186eb0 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -784,16 +784,18 @@ int sobel(const uchar *src_data, size_t src_step, uchar *dst_data, return CV_HAL_ERROR_NOT_IMPLEMENTED; } + auto mt = get_multithreading(); + if (dx == 1 && dy == 0) { - return convert_error(kleidicv_sobel_3x3_horizontal_s16_u8( + return convert_error(kleidicv_thread_sobel_3x3_horizontal_s16_u8( src_data, src_step, reinterpret_cast(dst_data), dst_step, - width, height, cn)); + width, height, cn, mt)); } if (dx == 0 && dy == 1) { - return convert_error(kleidicv_sobel_3x3_vertical_s16_u8( + return convert_error(kleidicv_thread_sobel_3x3_vertical_s16_u8( src_data, src_step, reinterpret_cast(dst_data), dst_step, - width, height, cn)); + width, height, cn, mt)); } return CV_HAL_ERROR_NOT_IMPLEMENTED; diff --git a/kleidicv/include/kleidicv/filters/sobel.h b/kleidicv/include/kleidicv/filters/sobel.h index a2c567ae0..243b620a7 100644 --- a/kleidicv/include/kleidicv/filters/sobel.h +++ b/kleidicv/include/kleidicv/filters/sobel.h @@ -7,39 +7,50 @@ #include "kleidicv/kleidicv.h" +extern "C" { +// For internal use only. See instead kleidicv_sobel_3x3_horizontal_s16_u8. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_sobel_3x3_horizontal_stripe_s16_u8, + const uint8_t *src, size_t src_stride, int16_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels); +// For internal use only. See instead kleidicv_sobel_3x3_vertical_s16_u8. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_sobel_3x3_vertical_stripe_s16_u8, + const uint8_t *src, size_t src_stride, int16_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels); +} + namespace kleidicv { namespace neon { -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); } // namespace neon namespace sve2 { -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); } // namespace sve2 namespace sme2 { -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); } // namespace sme2 } // namespace kleidicv diff --git a/kleidicv/src/filters/sobel_api.cpp b/kleidicv/src/filters/sobel_api.cpp index f1460027b..02f4724b0 100644 --- a/kleidicv/src/filters/sobel_api.cpp +++ b/kleidicv/src/filters/sobel_api.cpp @@ -12,7 +12,29 @@ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::partialname), \ &kleidicv::sme2::partialname) -KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_horizontal_s16_u8, - sobel_3x3_horizontal_s16_u8); -KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_vertical_s16_u8, - sobel_3x3_vertical_s16_u8); +KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_horizontal_stripe_s16_u8, + sobel_3x3_horizontal_stripe_s16_u8); +KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_vertical_stripe_s16_u8, + sobel_3x3_vertical_stripe_s16_u8); + +namespace kleidicv { +static kleidicv_error_t sobel_3x3_horizontal_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels) { + return kleidicv_sobel_3x3_horizontal_stripe_s16_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels); +} +static kleidicv_error_t sobel_3x3_vertical_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels) { + return kleidicv_sobel_3x3_vertical_stripe_s16_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels); +} +} // namespace kleidicv + +KLEIDICV_MULTIVERSION_C_API(kleidicv_sobel_3x3_horizontal_s16_u8, + &kleidicv::sobel_3x3_horizontal_s16_u8, nullptr, + nullptr); +KLEIDICV_MULTIVERSION_C_API(kleidicv_sobel_3x3_vertical_s16_u8, + &kleidicv::sobel_3x3_vertical_s16_u8, nullptr, + nullptr); diff --git a/kleidicv/src/filters/sobel_neon.cpp b/kleidicv/src/filters/sobel_neon.cpp index 2286dd488..330d091fe 100644 --- a/kleidicv/src/filters/sobel_neon.cpp +++ b/kleidicv/src/filters/sobel_neon.cpp @@ -128,10 +128,10 @@ class VerticalSobel3x3 { }; // end of class VerticalSobel3x3 KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels) { +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); CHECK_IMAGE_SIZE(width, height); @@ -158,16 +158,16 @@ kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, HorizontalSobel3x3 horizontal_sobel; SeparableFilter3x3> filter{horizontal_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels) { +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); CHECK_IMAGE_SIZE(width, height); @@ -194,7 +194,7 @@ kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, VerticalSobel3x3 vertical_sobel; SeparableFilter3x3> filter{vertical_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/sobel_sc.h b/kleidicv/src/filters/sobel_sc.h index 2409a3671..c2e71c270 100644 --- a/kleidicv/src/filters/sobel_sc.h +++ b/kleidicv/src/filters/sobel_sc.h @@ -119,9 +119,9 @@ class VerticalSobel3x3 { }; // end of class VerticalSobel3x3 KLEIDICV_TARGET_FN_ATTRS -static kleidicv_error_t sobel_3x3_horizontal_s16_u8_sc( +static kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8_sc( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels) KLEIDICV_STREAMING_COMPATIBLE { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); @@ -149,15 +149,15 @@ static kleidicv_error_t sobel_3x3_horizontal_s16_u8_sc( HorizontalSobel3x3 horizontal_sobel; SeparableFilter3x3> filter{horizontal_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } KLEIDICV_TARGET_FN_ATTRS -static kleidicv_error_t sobel_3x3_vertical_s16_u8_sc( +static kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8_sc( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels) KLEIDICV_STREAMING_COMPATIBLE { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); @@ -185,7 +185,7 @@ static kleidicv_error_t sobel_3x3_vertical_s16_u8_sc( VerticalSobel3x3 vertical_sobel; SeparableFilter3x3> filter{vertical_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/sobel_sme2.cpp b/kleidicv/src/filters/sobel_sme2.cpp index 0183c9315..f44af14ee 100644 --- a/kleidicv/src/filters/sobel_sme2.cpp +++ b/kleidicv/src/filters/sobel_sme2.cpp @@ -7,19 +7,23 @@ namespace kleidicv::sme2 { KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -sobel_3x3_horizontal_s16_u8(const uint8_t *src, size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, size_t height, - size_t channels) { - return sobel_3x3_horizontal_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); +sobel_3x3_horizontal_stripe_s16_u8(const uint8_t *src, size_t src_stride, + int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, + size_t y_end, size_t channels) { + return sobel_3x3_horizontal_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -sobel_3x3_vertical_s16_u8(const uint8_t *src, size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, size_t height, - size_t channels) { - return sobel_3x3_vertical_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); +sobel_3x3_vertical_stripe_s16_u8(const uint8_t *src, size_t src_stride, + int16_t *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, + size_t channels) { + return sobel_3x3_vertical_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/sobel_sve2.cpp b/kleidicv/src/filters/sobel_sve2.cpp index 189e75325..f57250ab2 100644 --- a/kleidicv/src/filters/sobel_sve2.cpp +++ b/kleidicv/src/filters/sobel_sve2.cpp @@ -6,18 +6,22 @@ namespace kleidicv::sve2 { -KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_horizontal_s16_u8( +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels) { - return sobel_3x3_horizontal_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { + return sobel_3x3_horizontal_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } -KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_vertical_s16_u8( +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels) { - return sobel_3x3_vertical_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { + return sobel_3x3_vertical_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } } // namespace kleidicv::sve2 diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index da87a6be4..ab45ef6e1 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -252,6 +252,16 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, kleidicv_thread_multithreading); +kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading); + +kleidicv_error_t kleidicv_thread_sobel_3x3_vertical_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 4a9731da4..6d8506ee1 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -11,6 +11,7 @@ #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/filters/separable_filter_2d.h" +#include "kleidicv/filters/sobel.h" #include "kleidicv/kleidicv.h" typedef std::function FunctionCallback; @@ -440,4 +441,30 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( }; return kleidicv_thread_filter(callback, width, height, channels, kernel_width, kernel_height, context, mt); -} \ No newline at end of file +} + +kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading mt) { + FunctionCallback callback = [=](unsigned y_begin, unsigned y_end) { + return kleidicv_sobel_3x3_horizontal_stripe_s16_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels); + }; + return mt.parallel(kleidicv_thread_std_function_callback, &callback, + mt.parallel_data, height); +} + +kleidicv_error_t kleidicv_thread_sobel_3x3_vertical_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading mt) { + FunctionCallback callback = [=](unsigned y_begin, unsigned y_end) { + return kleidicv_sobel_3x3_vertical_stripe_s16_u8(src, src_stride, dst, + dst_stride, width, height, + y_begin, y_end, channels); + }; + return mt.parallel(kleidicv_thread_std_function_callback, &callback, + mt.parallel_data, height); +} diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 56bdb6703..bf456eb25 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -193,6 +193,30 @@ TEST_P(Thread, separable_filter_2d_u8) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } +TEST_P(Thread, SobelHorizontal1Channel) { + check_unary_op(kleidicv_sobel_3x3_horizontal_s16_u8, + kleidicv_thread_sobel_3x3_horizontal_s16_u8, + 1, 1, 1); +} + +TEST_P(Thread, SobelHorizontal3Channels) { + check_unary_op(kleidicv_sobel_3x3_horizontal_s16_u8, + kleidicv_thread_sobel_3x3_horizontal_s16_u8, + 3, 3, 3); +} + +TEST_P(Thread, SobelVertical1Channel) { + check_unary_op(kleidicv_sobel_3x3_vertical_s16_u8, + kleidicv_thread_sobel_3x3_vertical_s16_u8, 1, + 1, 1); +} + +TEST_P(Thread, SobelVertical3Channels) { + check_unary_op(kleidicv_sobel_3x3_vertical_s16_u8, + kleidicv_thread_sobel_3x3_vertical_s16_u8, 3, + 3, 3); +} + INSTANTIATE_TEST_SUITE_P(, Thread, testing::Values(P{1, 1, 1}, P{1, 2, 1}, P{1, 2, 2}, P{2, 1, 2}, P{2, 2, 1}, P{1, 3, 2}, -- GitLab