diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index bb8914f4f5886918553bc28714353458858667ba..860186eb092cc26363b95755aeb6296be7335107 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -423,12 +423,14 @@ int separable_filter_2d_operation(cvhalFilter2D *context, uchar *src_data, params->cached_max_image_height = height_sz; } - kleidicv_error_t filter_err = kleidicv_separable_filter_2d_u8( + auto mt = get_multithreading(); + + kleidicv_error_t filter_err = kleidicv_thread_separable_filter_2d_u8( reinterpret_cast(src_data), src_step, reinterpret_cast(dst_data), dst_step, static_cast(width), static_cast(height), params->channels, params->kernel_x, params->kernel_width, params->kernel_y, - params->kernel_height, params->border_type, filter_context); + params->kernel_height, params->border_type, filter_context, mt); return convert_error(filter_err); } @@ -533,11 +535,12 @@ int gaussian_blur(const uchar *src_data, size_t src_step, uchar *dst_data, return convert_error(create_err); } - kleidicv_error_t blur_err = kleidicv_gaussian_blur_u8( + auto mt = get_multithreading(); + kleidicv_error_t blur_err = kleidicv_thread_gaussian_blur_u8( reinterpret_cast(src_data), src_step, reinterpret_cast(dst_data), dst_step, width, height, cn, kernel_width, kernel_height, sigma_x, sigma_y, kleidicv_border_type, - context); + context, mt); kleidicv_error_t release_err = kleidicv_filter_context_release(context); @@ -781,16 +784,18 @@ int sobel(const uchar *src_data, size_t src_step, uchar *dst_data, return CV_HAL_ERROR_NOT_IMPLEMENTED; } + auto mt = get_multithreading(); + if (dx == 1 && dy == 0) { - return convert_error(kleidicv_sobel_3x3_horizontal_s16_u8( + return convert_error(kleidicv_thread_sobel_3x3_horizontal_s16_u8( src_data, src_step, reinterpret_cast(dst_data), dst_step, - width, height, cn)); + width, height, cn, mt)); } if (dx == 0 && dy == 1) { - return convert_error(kleidicv_sobel_3x3_vertical_s16_u8( + return convert_error(kleidicv_thread_sobel_3x3_vertical_s16_u8( src_data, src_step, reinterpret_cast(dst_data), dst_step, - width, height, cn)); + width, height, cn, mt)); } return CV_HAL_ERROR_NOT_IMPLEMENTED; diff --git a/kleidicv/include/kleidicv/filters/separable_filter_2d.h b/kleidicv/include/kleidicv/filters/separable_filter_2d.h index 514d88a5af566e9df5c8f0e510305b5ba6445a57..867b612ace05b2a96770f3c51d060c82dceb0d09 100644 --- a/kleidicv/include/kleidicv/filters/separable_filter_2d.h +++ b/kleidicv/include/kleidicv/filters/separable_filter_2d.h @@ -6,37 +6,55 @@ #define KLEIDICV_FILTERS_SEPARABLE_FILTER_2D_H #include "kleidicv/config.h" +#include "kleidicv/kleidicv.h" #include "kleidicv/types.h" +extern "C" { +// For internal use only. See instead kleidicv_separable_filter_2d_u8. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_separable_filter_2d_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, + const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); +} + namespace kleidicv { namespace neon { -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } // namespace neon namespace sve2 { -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } // namespace sve2 namespace sme2 { -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } // namespace sme2 diff --git a/kleidicv/include/kleidicv/filters/sobel.h b/kleidicv/include/kleidicv/filters/sobel.h index a2c567ae0f916a0cd31a3ce0ee468fcfb1979490..243b620a7cf0b9f4ea87339cd2e2f95cd0c14639 100644 --- a/kleidicv/include/kleidicv/filters/sobel.h +++ b/kleidicv/include/kleidicv/filters/sobel.h @@ -7,39 +7,50 @@ #include "kleidicv/kleidicv.h" +extern "C" { +// For internal use only. See instead kleidicv_sobel_3x3_horizontal_s16_u8. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_sobel_3x3_horizontal_stripe_s16_u8, + const uint8_t *src, size_t src_stride, int16_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels); +// For internal use only. See instead kleidicv_sobel_3x3_vertical_s16_u8. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_sobel_3x3_vertical_stripe_s16_u8, + const uint8_t *src, size_t src_stride, int16_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels); +} + namespace kleidicv { namespace neon { -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); } // namespace neon namespace sve2 { -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); } // namespace sve2 namespace sme2 { -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels); +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels); } // namespace sme2 } // namespace kleidicv diff --git a/kleidicv/src/filters/separable_filter_2d_api.cpp b/kleidicv/src/filters/separable_filter_2d_api.cpp index 5e6a222a2d04bf15dfb0352e13f6dbd75f6b786a..028a66f01f9ee27e8ef228fe8c9e7c9bf23bc020 100644 --- a/kleidicv/src/filters/separable_filter_2d_api.cpp +++ b/kleidicv/src/filters/separable_filter_2d_api.cpp @@ -59,6 +59,23 @@ kleidicv_error_t kleidicv_filter_context_release( } // extern "C" KLEIDICV_MULTIVERSION_C_API( - kleidicv_separable_filter_2d_u8, &kleidicv::neon::separable_filter_2d_u8, - KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_u8), - &kleidicv::sme2::separable_filter_2d_u8); + kleidicv_separable_filter_2d_stripe_u8, + &kleidicv::neon::separable_filter_2d_stripe_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_stripe_u8), + &kleidicv::sme2::separable_filter_2d_stripe_u8); + +namespace kleidicv { +static kleidicv_error_t separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return kleidicv_separable_filter_2d_stripe_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); +} +} // namespace kleidicv + +KLEIDICV_MULTIVERSION_C_API(kleidicv_separable_filter_2d_u8, + &kleidicv::separable_filter_2d_u8, nullptr, + nullptr); diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index c9a5834c470d3190f1af6d9a5ad155e86b7f786d..2a29a9d683b953357c092daf419fa1e0c3fda7e7 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -146,11 +146,12 @@ static kleidicv_error_t separable_filter_2d_checks( } KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { auto *workspace = reinterpret_cast(context); kleidicv_error_t checks_result = separable_filter_2d_checks( src, src_stride, dst, dst_stride, width, height, channels, kernel_x, @@ -176,7 +177,7 @@ kleidicv_error_t separable_filter_2d_u8( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, *fixed_border_type, filter); return KLEIDICV_OK; diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index e03e460d9ec9e988344baf388ad985b47e4ae5c2..8330c66c2deca08952197cf4e42753cf42998c5c 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -174,11 +174,11 @@ static kleidicv_error_t separable_filter_2d_checks( return KLEIDICV_OK; } -static kleidicv_error_t separable_filter_2d_u8_sc( +static kleidicv_error_t separable_filter_2d_stripe_u8_sc( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { auto *workspace = reinterpret_cast(context); kleidicv_error_t checks_result = separable_filter_2d_checks( @@ -220,7 +220,7 @@ static kleidicv_error_t separable_filter_2d_u8_sc( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, *fixed_border_type, filter); return KLEIDICV_OK; diff --git a/kleidicv/src/filters/separable_filter_2d_sme2.cpp b/kleidicv/src/filters/separable_filter_2d_sme2.cpp index fc0857178e82f97db852bbc219a7fa9e893ee382..ef3e245701b69d8b1f592304c9c26cfcb2fc15b6 100644 --- a/kleidicv/src/filters/separable_filter_2d_sme2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sme2.cpp @@ -8,15 +8,17 @@ namespace kleidicv::sme2 { KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -separable_filter_2d_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, - size_t dst_stride, size_t width, size_t height, - size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, - size_t kernel_height, kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) { - return separable_filter_2d_u8_sc( - src, src_stride, dst, dst_stride, width, height, channels, kernel_x, - kernel_width, kernel_y, kernel_height, border_type, context); +separable_filter_2d_stripe_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, + size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return separable_filter_2d_stripe_u8_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/separable_filter_2d_sve2.cpp b/kleidicv/src/filters/separable_filter_2d_sve2.cpp index 0de532c1c698782bb0756f26b7ed35a5214f4a4a..0ce142234c3d67c23ed936ba625df27e87bf35f6 100644 --- a/kleidicv/src/filters/separable_filter_2d_sve2.cpp +++ b/kleidicv/src/filters/separable_filter_2d_sve2.cpp @@ -8,14 +8,15 @@ namespace kleidicv::sve2 { KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t separable_filter_2d_u8( +kleidicv_error_t separable_filter_2d_stripe_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, const uint8_t *kernel_x, - size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { - return separable_filter_2d_u8_sc( - src, src_stride, dst, dst_stride, width, height, channels, kernel_x, - kernel_width, kernel_y, kernel_height, border_type, context); + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return separable_filter_2d_stripe_u8_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } } // namespace kleidicv::sve2 diff --git a/kleidicv/src/filters/sobel_api.cpp b/kleidicv/src/filters/sobel_api.cpp index f1460027ba3fc01fac55655e4166febaedb86100..02f4724b01b56519ed0a92bb229345a4007b841c 100644 --- a/kleidicv/src/filters/sobel_api.cpp +++ b/kleidicv/src/filters/sobel_api.cpp @@ -12,7 +12,29 @@ KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::partialname), \ &kleidicv::sme2::partialname) -KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_horizontal_s16_u8, - sobel_3x3_horizontal_s16_u8); -KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_vertical_s16_u8, - sobel_3x3_vertical_s16_u8); +KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_horizontal_stripe_s16_u8, + sobel_3x3_horizontal_stripe_s16_u8); +KLEIDICV_DEFINE_C_API(kleidicv_sobel_3x3_vertical_stripe_s16_u8, + sobel_3x3_vertical_stripe_s16_u8); + +namespace kleidicv { +static kleidicv_error_t sobel_3x3_horizontal_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels) { + return kleidicv_sobel_3x3_horizontal_stripe_s16_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels); +} +static kleidicv_error_t sobel_3x3_vertical_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels) { + return kleidicv_sobel_3x3_vertical_stripe_s16_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels); +} +} // namespace kleidicv + +KLEIDICV_MULTIVERSION_C_API(kleidicv_sobel_3x3_horizontal_s16_u8, + &kleidicv::sobel_3x3_horizontal_s16_u8, nullptr, + nullptr); +KLEIDICV_MULTIVERSION_C_API(kleidicv_sobel_3x3_vertical_s16_u8, + &kleidicv::sobel_3x3_vertical_s16_u8, nullptr, + nullptr); diff --git a/kleidicv/src/filters/sobel_neon.cpp b/kleidicv/src/filters/sobel_neon.cpp index 2286dd488c14a4fd1a0efbd5eb7280c1e6b72bcd..330d091fe754b01a4d0c6ea4b77f3c7aeac37683 100644 --- a/kleidicv/src/filters/sobel_neon.cpp +++ b/kleidicv/src/filters/sobel_neon.cpp @@ -128,10 +128,10 @@ class VerticalSobel3x3 { }; // end of class VerticalSobel3x3 KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels) { +kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); CHECK_IMAGE_SIZE(width, height); @@ -158,16 +158,16 @@ kleidicv_error_t sobel_3x3_horizontal_s16_u8(const uint8_t *src, HorizontalSobel3x3 horizontal_sobel; SeparableFilter3x3> filter{horizontal_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, - size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels) { +kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); CHECK_IMAGE_SIZE(width, height); @@ -194,7 +194,7 @@ kleidicv_error_t sobel_3x3_vertical_s16_u8(const uint8_t *src, VerticalSobel3x3 vertical_sobel; SeparableFilter3x3> filter{vertical_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/sobel_sc.h b/kleidicv/src/filters/sobel_sc.h index 2409a36710acaa3b302b9f17caf19997934ef8ff..c2e71c270738b0aa5050e61cc1f0d5909a5a1699 100644 --- a/kleidicv/src/filters/sobel_sc.h +++ b/kleidicv/src/filters/sobel_sc.h @@ -119,9 +119,9 @@ class VerticalSobel3x3 { }; // end of class VerticalSobel3x3 KLEIDICV_TARGET_FN_ATTRS -static kleidicv_error_t sobel_3x3_horizontal_s16_u8_sc( +static kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8_sc( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels) KLEIDICV_STREAMING_COMPATIBLE { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); @@ -149,15 +149,15 @@ static kleidicv_error_t sobel_3x3_horizontal_s16_u8_sc( HorizontalSobel3x3 horizontal_sobel; SeparableFilter3x3> filter{horizontal_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } KLEIDICV_TARGET_FN_ATTRS -static kleidicv_error_t sobel_3x3_vertical_s16_u8_sc( +static kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8_sc( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels) KLEIDICV_STREAMING_COMPATIBLE { CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); @@ -185,7 +185,7 @@ static kleidicv_error_t sobel_3x3_vertical_s16_u8_sc( VerticalSobel3x3 vertical_sobel; SeparableFilter3x3> filter{vertical_sobel}; - workspace->process(rect, 0, rect.height(), src_rows, dst_rows, channels, + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, FixedBorderType::REPLICATE, filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/sobel_sme2.cpp b/kleidicv/src/filters/sobel_sme2.cpp index 0183c9315613580fd8a77aa0b5ae5564defb708b..f44af14ee7f124428e6008d44e1518c28160ff25 100644 --- a/kleidicv/src/filters/sobel_sme2.cpp +++ b/kleidicv/src/filters/sobel_sme2.cpp @@ -7,19 +7,23 @@ namespace kleidicv::sme2 { KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -sobel_3x3_horizontal_s16_u8(const uint8_t *src, size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, size_t height, - size_t channels) { - return sobel_3x3_horizontal_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); +sobel_3x3_horizontal_stripe_s16_u8(const uint8_t *src, size_t src_stride, + int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, + size_t y_end, size_t channels) { + return sobel_3x3_horizontal_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -sobel_3x3_vertical_s16_u8(const uint8_t *src, size_t src_stride, int16_t *dst, - size_t dst_stride, size_t width, size_t height, - size_t channels) { - return sobel_3x3_vertical_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); +sobel_3x3_vertical_stripe_s16_u8(const uint8_t *src, size_t src_stride, + int16_t *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, + size_t channels) { + return sobel_3x3_vertical_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/sobel_sve2.cpp b/kleidicv/src/filters/sobel_sve2.cpp index 189e7532513cffadaa6b8d3582e65afb08170df9..f57250ab2f320077e3ad6a6e68b9bd010b3bf2db 100644 --- a/kleidicv/src/filters/sobel_sve2.cpp +++ b/kleidicv/src/filters/sobel_sve2.cpp @@ -6,18 +6,22 @@ namespace kleidicv::sve2 { -KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_horizontal_s16_u8( +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels) { - return sobel_3x3_horizontal_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { + return sobel_3x3_horizontal_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } -KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_vertical_s16_u8( +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels) { - return sobel_3x3_vertical_s16_u8_sc(src, src_stride, dst, dst_stride, width, - height, channels); + size_t width, size_t height, size_t y_begin, size_t y_end, + size_t channels) { + return sobel_3x3_vertical_stripe_s16_u8_sc(src, src_stride, dst, dst_stride, + width, height, y_begin, y_end, + channels); } } // namespace kleidicv::sve2 diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index 2af9555cb320f1648d6db80e2f8d92ee2562ea7f..ab45ef6e14da7a0dbdbab4a081522a7482b3a1f4 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -245,6 +245,23 @@ kleidicv_error_t kleidicv_thread_gaussian_blur_u8( kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, kleidicv_thread_multithreading); +kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading); + +kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading); + +kleidicv_error_t kleidicv_thread_sobel_3x3_vertical_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 47ae4b9b6409f84a333b88a5f330d23be0360841..6d8506ee1bcbcad5605fc20df937a3f9e4294c1a 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -10,6 +10,8 @@ #include #include "kleidicv/filters/gaussian_blur.h" +#include "kleidicv/filters/separable_filter_2d.h" +#include "kleidicv/filters/sobel.h" #include "kleidicv/kleidicv.h" typedef std::function FunctionCallback; @@ -366,12 +368,12 @@ kleidicv_error_t parallel_min_max_loc(FunctionType min_max_loc_func, DEFINE_KLEIDICV_THREAD_MIN_MAX_LOC(u8, uint8_t); -kleidicv_error_t kleidicv_thread_gaussian_blur_u8( - const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t channels, size_t kernel_width, - size_t kernel_height, float sigma_x, float sigma_y, - kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, - kleidicv_thread_multithreading mt) { +template +kleidicv_error_t kleidicv_thread_filter(F filter, size_t width, size_t height, + size_t channels, size_t kernel_width, + size_t kernel_height, + kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { FunctionCallback callback = [=](unsigned y_begin, unsigned y_end) { // The context contains a buffer that can only fit a single row, so can't be // shared between threads. Since we don't know how many threads there are, @@ -392,10 +394,7 @@ kleidicv_error_t kleidicv_thread_gaussian_blur_u8( // GCOVR_EXCL_STOP } - kleidicv_error_t result = kleidicv_gaussian_blur_stripe_u8( - src, src_stride, dst, dst_stride, width, height, y_begin, y_end, - channels, kernel_width, kernel_height, sigma_x, sigma_y, border_type, - thread_context); + kleidicv_error_t result = filter(y_begin, y_end, thread_context); if (create_context) { kleidicv_error_t context_release_result = @@ -409,3 +408,63 @@ kleidicv_error_t kleidicv_thread_gaussian_blur_u8( return mt.parallel(kleidicv_thread_std_function_callback, &callback, mt.parallel_data, height); } + +kleidicv_error_t kleidicv_thread_gaussian_blur_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, float sigma_y, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + auto callback = [=](size_t y_begin, size_t y_end, + kleidicv_filter_context_t *thread_context) { + return kleidicv_gaussian_blur_stripe_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_width, kernel_height, sigma_x, sigma_y, border_type, + thread_context); + }; + return kleidicv_thread_filter(callback, width, height, channels, kernel_width, + kernel_height, context, mt); +} + +kleidicv_error_t kleidicv_thread_separable_filter_2d_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const uint8_t *kernel_x, + size_t kernel_width, const uint8_t *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + auto callback = [=](size_t y_begin, size_t y_end, + kleidicv_filter_context_t *thread_context) { + return kleidicv_separable_filter_2d_stripe_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_x, kernel_width, kernel_y, kernel_height, border_type, + thread_context); + }; + return kleidicv_thread_filter(callback, width, height, channels, kernel_width, + kernel_height, context, mt); +} + +kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading mt) { + FunctionCallback callback = [=](unsigned y_begin, unsigned y_end) { + return kleidicv_sobel_3x3_horizontal_stripe_s16_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels); + }; + return mt.parallel(kleidicv_thread_std_function_callback, &callback, + mt.parallel_data, height); +} + +kleidicv_error_t kleidicv_thread_sobel_3x3_vertical_s16_u8( + const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, + kleidicv_thread_multithreading mt) { + FunctionCallback callback = [=](unsigned y_begin, unsigned y_end) { + return kleidicv_sobel_3x3_vertical_stripe_s16_u8(src, src_stride, dst, + dst_stride, width, height, + y_begin, y_end, channels); + }; + return mt.parallel(kleidicv_thread_std_function_callback, &callback, + mt.parallel_data, height); +} diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 8ecd82e152db7b3e9920dce05355a3c1e45f59e3..bf456eb25d6aa5f21727b49cd0c888b7b0332933 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -166,6 +166,57 @@ TEST_P(Thread, gaussian_blur_u8) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } +TEST_P(Thread, separable_filter_2d_u8) { + unsigned width = 0, height = 0, thread_count = 0; + std::tie(width, height, thread_count) = GetParam(); + (void)thread_count; + size_t channels = 1; + const size_t kernel_width = 5; + const size_t kernel_height = kernel_width; + + test::Array2D kernel_x{kernel_width, 1}; + kernel_x.set(0, 0, {9, 9, 9, 9, 9}); + test::Array2D kernel_y{kernel_height, 1}; + kernel_y.set(0, 0, {5, 6, 7, 8, 9}); + + kleidicv_border_type_t border_type = KLEIDICV_BORDER_TYPE_REPLICATE; + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, channels, kernel_width, + kernel_height, width, height)); + check_unary_op( + kleidicv_separable_filter_2d_u8, kleidicv_thread_separable_filter_2d_u8, + channels /*src_channels*/, channels /*dst_channels*/, + /*remaining arguments passed to separable_filter_2d_u8 functions*/ + channels, kernel_x.data(), kernel_width, kernel_y.data(), kernel_height, + border_type, context); + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TEST_P(Thread, SobelHorizontal1Channel) { + check_unary_op(kleidicv_sobel_3x3_horizontal_s16_u8, + kleidicv_thread_sobel_3x3_horizontal_s16_u8, + 1, 1, 1); +} + +TEST_P(Thread, SobelHorizontal3Channels) { + check_unary_op(kleidicv_sobel_3x3_horizontal_s16_u8, + kleidicv_thread_sobel_3x3_horizontal_s16_u8, + 3, 3, 3); +} + +TEST_P(Thread, SobelVertical1Channel) { + check_unary_op(kleidicv_sobel_3x3_vertical_s16_u8, + kleidicv_thread_sobel_3x3_vertical_s16_u8, 1, + 1, 1); +} + +TEST_P(Thread, SobelVertical3Channels) { + check_unary_op(kleidicv_sobel_3x3_vertical_s16_u8, + kleidicv_thread_sobel_3x3_vertical_s16_u8, 3, + 3, 3); +} + INSTANTIATE_TEST_SUITE_P(, Thread, testing::Values(P{1, 1, 1}, P{1, 2, 1}, P{1, 2, 2}, P{2, 1, 2}, P{2, 2, 1}, P{1, 3, 2},