From b62a3bb9a9624fc59a09880e9ea7fba37329f848 Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Mon, 12 Aug 2024 15:16:11 +0000 Subject: [PATCH] Multithreaded Gaussian blur --- adapters/opencv/kleidicv_hal.cpp | 5 +- .../include/kleidicv/filters/gaussian_blur.h | 50 +++++++++++-------- .../include/kleidicv/workspace/separable.h | 4 +- kleidicv/src/filters/gaussian_blur_api.cpp | 21 ++++++-- kleidicv/src/filters/gaussian_blur_neon.cpp | 45 +++++++++-------- kleidicv/src/filters/gaussian_blur_sc.h | 41 +++++++-------- kleidicv/src/filters/gaussian_blur_sme2.cpp | 19 +++---- kleidicv/src/filters/gaussian_blur_sve2.cpp | 18 +++---- .../src/filters/separable_filter_2d_neon.cpp | 4 +- kleidicv/src/filters/separable_filter_2d_sc.h | 4 +- kleidicv/src/filters/sobel_neon.cpp | 4 +- kleidicv/src/filters/sobel_sc.h | 4 +- .../include/kleidicv_thread/kleidicv_thread.h | 7 +++ kleidicv_thread/src/kleidicv_thread.cpp | 45 +++++++++++++++++ test/api/test_thread.cpp | 28 +++++++++-- 15 files changed, 199 insertions(+), 100 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 26abcdb2b..bb8914f4f 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -487,10 +487,11 @@ int gaussian_blur_binomial(const uchar *src_data, size_t src_step, return convert_error(create_err); } - kleidicv_error_t blur_err = kleidicv_gaussian_blur_u8( + auto mt = get_multithreading(); + kleidicv_error_t blur_err = kleidicv_thread_gaussian_blur_u8( reinterpret_cast(src_data), src_step, reinterpret_cast(dst_data), dst_step, width, height, cn, - kernel_size, kernel_size, 0.0, 0.0, kleidicv_border_type, context); + kernel_size, kernel_size, 0.0, 0.0, kleidicv_border_type, context, mt); kleidicv_error_t release_err = kleidicv_filter_context_release(context); diff --git a/kleidicv/include/kleidicv/filters/gaussian_blur.h b/kleidicv/include/kleidicv/filters/gaussian_blur.h index eee58e7d0..debfd01bc 100644 --- a/kleidicv/include/kleidicv/filters/gaussian_blur.h +++ b/kleidicv/include/kleidicv/filters/gaussian_blur.h @@ -6,43 +6,51 @@ #define KLEIDICV_FILTERS_GAUSSIAN_BLUR_H #include "kleidicv/config.h" +#include "kleidicv/kleidicv.h" #include "kleidicv/types.h" +extern "C" { +// For internal use only. See instead kleidicv_gaussian_blur_u8. +// Blur a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_gaussian_blur_stripe_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, + size_t y_end, size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); +} + namespace kleidicv { namespace neon { -kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, - uint8_t *dst, size_t dst_stride, size_t width, - size_t height, size_t channels, - size_t kernel_width, size_t kernel_height, - float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context); +kleidicv_error_t gaussian_blur_stripe_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); } // namespace neon namespace sve2 { -kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, - uint8_t *dst, size_t dst_stride, size_t width, - size_t height, size_t channels, - size_t kernel_width, size_t kernel_height, - float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context); +kleidicv_error_t gaussian_blur_stripe_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); } // namespace sve2 namespace sme2 { -kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, - uint8_t *dst, size_t dst_stride, size_t width, - size_t height, size_t channels, - size_t kernel_width, size_t kernel_height, - float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context); +kleidicv_error_t gaussian_blur_stripe_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); } // namespace sme2 diff --git a/kleidicv/include/kleidicv/workspace/separable.h b/kleidicv/include/kleidicv/workspace/separable.h index 6a501686e..899bccf69 100644 --- a/kleidicv/include/kleidicv/workspace/separable.h +++ b/kleidicv/include/kleidicv/workspace/separable.h @@ -122,7 +122,7 @@ class SeparableFilterWorkspace final { // Processes rows vertically first along the full width template - void process(Rectangle rect, + void process(Rectangle rect, size_t y_begin, size_t y_end, Rows src_rows, Rows dst_rows, size_t channels, typename FilterType::BorderType border_type, @@ -139,7 +139,7 @@ class SeparableFilterWorkspace final { buffer_rows_stride_, channels}; // Vertical processing loop. - for (size_t vertical_index = 0; vertical_index < rect.height(); + for (size_t vertical_index = y_begin; vertical_index < y_end; ++vertical_index) { // Recalculate vertical border offsets. auto offsets = vertical_border.offsets_with_border(vertical_index); diff --git a/kleidicv/src/filters/gaussian_blur_api.cpp b/kleidicv/src/filters/gaussian_blur_api.cpp index 161cf666d..67d47d4fa 100644 --- a/kleidicv/src/filters/gaussian_blur_api.cpp +++ b/kleidicv/src/filters/gaussian_blur_api.cpp @@ -7,6 +7,21 @@ #include "kleidicv/kleidicv.h" KLEIDICV_MULTIVERSION_C_API( - kleidicv_gaussian_blur_u8, &kleidicv::neon::gaussian_blur_u8, - KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_u8), - &kleidicv::sme2::gaussian_blur_u8); + kleidicv_gaussian_blur_stripe_u8, &kleidicv::neon::gaussian_blur_stripe_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_stripe_u8), + &kleidicv::sme2::gaussian_blur_stripe_u8); + +namespace kleidicv { +static kleidicv_error_t gaussian_blur_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return kleidicv_gaussian_blur_stripe_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels, + kernel_width, kernel_height, sigma_x, sigma_y, border_type, context); +} +} // namespace kleidicv + +KLEIDICV_MULTIVERSION_C_API(kleidicv_gaussian_blur_u8, + &kleidicv::gaussian_blur_u8, nullptr, nullptr); diff --git a/kleidicv/src/filters/gaussian_blur_neon.cpp b/kleidicv/src/filters/gaussian_blur_neon.cpp index 1fe35b290..f45a253ce 100644 --- a/kleidicv/src/filters/gaussian_blur_neon.cpp +++ b/kleidicv/src/filters/gaussian_blur_neon.cpp @@ -634,8 +634,9 @@ class GaussianBlur { template static kleidicv_error_t gaussian_blur_fixed_kernel_size( const ScalarType *src, size_t src_stride, ScalarType *dst, - size_t dst_stride, Rectangle &rect, size_t channels, float sigma, - FixedBorderType border_type, SeparableFilterWorkspace *workspace) { + size_t dst_stride, Rectangle &rect, size_t y_begin, size_t y_end, + size_t channels, float sigma, FixedBorderType border_type, + SeparableFilterWorkspace *workspace) { using GaussianBlurFilter = GaussianBlur; GaussianBlurFilter blur{sigma}; @@ -643,7 +644,8 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, border_type, filter); + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, + border_type, filter); return KLEIDICV_OK; } @@ -652,26 +654,27 @@ template static kleidicv_error_t gaussian_blur(size_t kernel_size, const ScalarType *src, size_t src_stride, ScalarType *dst, size_t dst_stride, Rectangle &rect, + size_t y_begin, size_t y_end, size_t channels, float sigma, FixedBorderType border_type, SeparableFilterWorkspace *workspace) { switch (kernel_size) { case 3: return gaussian_blur_fixed_kernel_size<3, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); case 5: return gaussian_blur_fixed_kernel_size<5, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); case 7: return gaussian_blur_fixed_kernel_size<7, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); case 15: return gaussian_blur_fixed_kernel_size<15, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); default: return KLEIDICV_ERROR_NOT_IMPLEMENTED; } @@ -718,13 +721,11 @@ static kleidicv_error_t gaussian_blur_checks( } KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, - uint8_t *dst, size_t dst_stride, size_t width, - size_t height, size_t channels, - size_t kernel_width, size_t kernel_height, - float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) { +kleidicv_error_t gaussian_blur_stripe_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { auto *workspace = reinterpret_cast(context); kleidicv_error_t checks_result = gaussian_blur_checks( src, src_stride, dst, dst_stride, width, height, channels, kernel_width, @@ -743,13 +744,13 @@ kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, if (sigma_x == 0.0) { return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, - rect, channels, sigma_x, *fixed_border_type, - workspace); + rect, y_begin, y_end, channels, sigma_x, + *fixed_border_type, workspace); } return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, - rect, channels, sigma_x, *fixed_border_type, - workspace); + rect, y_begin, y_end, channels, sigma_x, + *fixed_border_type, workspace); } } // namespace kleidicv::neon diff --git a/kleidicv/src/filters/gaussian_blur_sc.h b/kleidicv/src/filters/gaussian_blur_sc.h index 3a3b2682e..65c7e5299 100644 --- a/kleidicv/src/filters/gaussian_blur_sc.h +++ b/kleidicv/src/filters/gaussian_blur_sc.h @@ -813,8 +813,8 @@ class GaussianBlur final template static kleidicv_error_t gaussian_blur_fixed_kernel_size( const ScalarType *src, size_t src_stride, ScalarType *dst, - size_t dst_stride, Rectangle &rect, size_t channels, float sigma, - FixedBorderType border_type, + size_t dst_stride, Rectangle &rect, size_t y_begin, size_t y_end, + size_t channels, float sigma, FixedBorderType border_type, SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING_COMPATIBLE { using GaussianBlurFilter = GaussianBlur; @@ -823,7 +823,8 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, border_type, filter); + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, + border_type, filter); return KLEIDICV_OK; } @@ -831,26 +832,26 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( template static kleidicv_error_t gaussian_blur( size_t kernel_size, const ScalarType *src, size_t src_stride, - ScalarType *dst, size_t dst_stride, Rectangle &rect, size_t channels, - float sigma, FixedBorderType border_type, + ScalarType *dst, size_t dst_stride, Rectangle &rect, size_t y_begin, + size_t y_end, size_t channels, float sigma, FixedBorderType border_type, SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING_COMPATIBLE { switch (kernel_size) { case 3: return gaussian_blur_fixed_kernel_size<3, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); case 5: return gaussian_blur_fixed_kernel_size<5, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); case 7: return gaussian_blur_fixed_kernel_size<7, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); case 15: return gaussian_blur_fixed_kernel_size<15, IsBinomial>( - src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, - workspace); + src, src_stride, dst, dst_stride, rect, y_begin, y_end, channels, + sigma, border_type, workspace); default: return KLEIDICV_ERROR_NOT_IMPLEMENTED; } @@ -896,10 +897,10 @@ static kleidicv_error_t gaussian_blur_checks( return KLEIDICV_OK; } -static kleidicv_error_t gaussian_blur_u8_sc( +static kleidicv_error_t gaussian_blur_stripe_u8_sc( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, size_t kernel_width, - size_t kernel_height, float sigma_x, float sigma_y, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, float sigma_x, float sigma_y, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { auto *workspace = reinterpret_cast(context); @@ -920,13 +921,13 @@ static kleidicv_error_t gaussian_blur_u8_sc( if (sigma_x == 0.0) { return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, - rect, channels, sigma_x, *fixed_border_type, - workspace); + rect, y_begin, y_end, channels, sigma_x, + *fixed_border_type, workspace); } return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, - rect, channels, sigma_x, *fixed_border_type, - workspace); + rect, y_begin, y_end, channels, sigma_x, + *fixed_border_type, workspace); } } // namespace KLEIDICV_TARGET_NAMESPACE diff --git a/kleidicv/src/filters/gaussian_blur_sme2.cpp b/kleidicv/src/filters/gaussian_blur_sme2.cpp index 64290a5f2..f82698737 100644 --- a/kleidicv/src/filters/gaussian_blur_sme2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sme2.cpp @@ -8,15 +8,16 @@ namespace kleidicv::sme2 { KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -gaussian_blur_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, - size_t dst_stride, size_t width, size_t height, - size_t channels, size_t kernel_width, size_t kernel_height, - float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) { - return gaussian_blur_u8_sc(src, src_stride, dst, dst_stride, width, height, - channels, kernel_width, kernel_height, sigma_x, - sigma_y, border_type, context); +gaussian_blur_stripe_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, + float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return gaussian_blur_stripe_u8_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_width, kernel_height, sigma_x, sigma_y, border_type, context); } } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/gaussian_blur_sve2.cpp b/kleidicv/src/filters/gaussian_blur_sve2.cpp index 7c329e3ef..04e50f8a6 100644 --- a/kleidicv/src/filters/gaussian_blur_sve2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sve2.cpp @@ -8,16 +8,14 @@ namespace kleidicv::sve2 { KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, - uint8_t *dst, size_t dst_stride, size_t width, - size_t height, size_t channels, - size_t kernel_width, size_t kernel_height, - float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) { - return gaussian_blur_u8_sc(src, src_stride, dst, dst_stride, width, height, - channels, kernel_width, kernel_height, sigma_x, - sigma_y, border_type, context); +kleidicv_error_t gaussian_blur_stripe_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return gaussian_blur_stripe_u8_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_width, kernel_height, sigma_x, sigma_y, border_type, context); } } // namespace kleidicv::sve2 diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index eefcd58b3..c9a5834c4 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -176,8 +176,8 @@ kleidicv_error_t separable_filter_2d_u8( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, - filter); + workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + *fixed_border_type, filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index 1cf91c2ba..e03e460d9 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -220,8 +220,8 @@ static kleidicv_error_t separable_filter_2d_u8_sc( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, - filter); + workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + *fixed_border_type, filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/sobel_neon.cpp b/kleidicv/src/filters/sobel_neon.cpp index 09e108575..2286dd488 100644 --- a/kleidicv/src/filters/sobel_neon.cpp +++ b/kleidicv/src/filters/sobel_neon.cpp @@ -158,7 +158,7 @@ kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, HorizontalSobel3x3 horizontal_sobel; SeparableFilter3x3> filter{horizontal_sobel}; - workspace->process(rect, src_rows, dst_rows, channels, + workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } @@ -194,7 +194,7 @@ kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, VerticalSobel3x3 vertical_sobel; SeparableFilter3x3> filter{vertical_sobel}; - workspace->process(rect, src_rows, dst_rows, channels, + workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/sobel_sc.h b/kleidicv/src/filters/sobel_sc.h index 783cd55ad..2409a3671 100644 --- a/kleidicv/src/filters/sobel_sc.h +++ b/kleidicv/src/filters/sobel_sc.h @@ -149,7 +149,7 @@ static kleidicv_error_t sobel_3x3_horizontal_s16_u8_sc( HorizontalSobel3x3 horizontal_sobel; SeparableFilter3x3> filter{horizontal_sobel}; - workspace->process(rect, src_rows, dst_rows, channels, + workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } @@ -185,7 +185,7 @@ static kleidicv_error_t sobel_3x3_vertical_s16_u8_sc( VerticalSobel3x3 vertical_sobel; SeparableFilter3x3> filter{vertical_sobel}; - workspace->process(rect, src_rows, dst_rows, channels, + workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index ae0c04e0b..2af9555cb 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -238,6 +238,13 @@ kleidicv_error_t kleidicv_thread_saturating_add_abs_with_threshold_s16( size_t src_b_stride, int16_t *dst, size_t dst_stride, size_t width, size_t height, int16_t threshold, kleidicv_thread_multithreading); +kleidicv_error_t kleidicv_thread_gaussian_blur_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 70107a509..47ae4b9b6 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -9,6 +9,7 @@ #include #include +#include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/kleidicv.h" typedef std::function FunctionCallback; @@ -364,3 +365,47 @@ kleidicv_error_t parallel_min_max_loc(FunctionType min_max_loc_func, } DEFINE_KLEIDICV_THREAD_MIN_MAX_LOC(u8, uint8_t); + +kleidicv_error_t kleidicv_thread_gaussian_blur_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + FunctionCallback callback = [=](unsigned y_begin, unsigned y_end) { + // The context contains a buffer that can only fit a single row, so can't be + // shared between threads. Since we don't know how many threads there are, + // create and destroy a context every time this callback is called. Only use + // the context argument for the first thread. + bool create_context = 0 != y_begin; + kleidicv_filter_context_t *thread_context = context; + if (create_context) { + kleidicv_error_t context_create_result = kleidicv_filter_context_create( + &thread_context, channels, kernel_width, kernel_height, width, + height); + // Excluded from coverage because it's impractical to test this. + // MockMallocToFail can't be used because malloc is used in thread setup. + // GCOVR_EXCL_START + if (KLEIDICV_OK != context_create_result) { + return context_create_result; + } + // GCOVR_EXCL_STOP + } + + kleidicv_error_t result = kleidicv_gaussian_blur_stripe_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_width, kernel_height, sigma_x, sigma_y, border_type, + thread_context); + + if (create_context) { + kleidicv_error_t context_release_result = + kleidicv_filter_context_release(thread_context); + if (KLEIDICV_OK == result) { + result = context_release_result; + } + } + return result; + }; + return mt.parallel(kleidicv_thread_std_function_callback, &callback, + mt.parallel_data, height); +} diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 69d5534d4..8ecd82e15 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -40,9 +40,10 @@ class Thread : public testing::TestWithParam

{ src.data(), src.stride(), dst_multi.data(), dst_multi.stride(), width, height, args..., get_multithreading_fake(thread_count)); - EXPECT_EQ(KLEIDICV_OK, single_result); - EXPECT_EQ(KLEIDICV_OK, multi_result); - EXPECT_EQ_ARRAY2D(dst_multi, dst_single); + EXPECT_EQ(single_result, multi_result); + if (KLEIDICV_OK == single_result) { + EXPECT_EQ_ARRAY2D(dst_multi, dst_single); + } } template ( + kleidicv_gaussian_blur_u8, kleidicv_thread_gaussian_blur_u8, + channels /*src_channels*/, channels /*dst_channels*/, + /*remaining arguments passed to gaussian_blur_u8 functions*/ channels, + kernel_width, kernel_height, sigma_x, sigma_y, border_type, context); + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + INSTANTIATE_TEST_SUITE_P(, Thread, testing::Values(P{1, 1, 1}, P{1, 2, 1}, P{1, 2, 2}, P{2, 1, 2}, P{2, 2, 1}, P{1, 3, 2}, -- GitLab