From 8c37b9d98ad1caa46768e6853df164d6ba1c9732 Mon Sep 17 00:00:00 2001 From: Noureldin Abdelfattah Date: Mon, 30 Jun 2025 13:04:34 +0100 Subject: [PATCH] Add median 9x9 to 15x15 implementation NEON version --- CHANGELOG.md | 1 + benchmark/benchmark.cpp | 12 +- conformity/opencv/test_median_blur.cpp | 68 +-- doc/functionality.md | 2 + doc/opencv.md | 7 +- .../include/kleidicv/filters/median_blur.h | 169 ++++--- kleidicv/include/kleidicv/kleidicv.h | 13 +- kleidicv/include/kleidicv/neon_intrinsics.h | 17 + kleidicv/src/filters/median_blur_api.cpp | 82 ++-- .../filters/median_blur_small_hist_neon.cpp | 436 ++++++++++++++++++ ...p => median_blur_sorting_network_neon.cpp} | 58 +-- ..._sc.h => median_blur_sorting_network_sc.h} | 42 +- ...p => median_blur_sorting_network_sme2.cpp} | 29 +- ...p => median_blur_sorting_network_sve2.cpp} | 21 +- kleidicv_thread/src/kleidicv_thread.cpp | 17 +- scripts/benchmark/benchmarks.txt | 10 +- test/api/test_median_blur.cpp | 78 +++- test/api/test_thread.cpp | 15 +- 18 files changed, 840 insertions(+), 237 deletions(-) create mode 100644 kleidicv/src/filters/median_blur_small_hist_neon.cpp rename kleidicv/src/filters/{median_blur_neon.cpp => median_blur_sorting_network_neon.cpp} (83%) rename kleidicv/src/filters/{median_blur_sc.h => median_blur_sorting_network_sc.h} (84%) rename kleidicv/src/filters/{median_blur_sme2.cpp => median_blur_sorting_network_sme2.cpp} (52%) rename kleidicv/src/filters/{median_blur_sve2.cpp => median_blur_sorting_network_sve2.cpp} (68%) diff --git a/CHANGELOG.md b/CHANGELOG.md index c552a742b..935e95a48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ This changelog aims to follow the guiding principles of ### Added - Median Blur for 3x3 kernels. +- Median Blur for 9x9, 11x11, 13x13 and 15x15 kernels, Neon backend only. ### Changed - Performance of Gaussian Blur is greatly improved in return for some accuracy. diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index bf6658fe8..4d35a0111 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -612,11 +612,19 @@ static void median_blur(benchmark::State& state, Function func) { BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint8_t, 1, , kleidicv_median_blur_u8) ->Arg(3) ->Arg(5) - ->Arg(7); + ->Arg(7) + ->Arg(9) + ->Arg(11) + ->Arg(13) + ->Arg(15); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint8_t, 4, , kleidicv_median_blur_u8) ->Arg(3) ->Arg(5) - ->Arg(7); + ->Arg(7) + ->Arg(9) + ->Arg(11) + ->Arg(13) + ->Arg(15); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, int8_t, 1, , kleidicv_median_blur_s8) ->Arg(3) diff --git a/conformity/opencv/test_median_blur.cpp b/conformity/opencv/test_median_blur.cpp index a963d79e1..85b78bc29 100644 --- a/conformity/opencv/test_median_blur.cpp +++ b/conformity/opencv/test_median_blur.cpp @@ -43,35 +43,45 @@ bool test_median_blur(int index, RecreatedMessageQueue& request_queue, std::vector& median_blur_tests_get() { // clang-format off static std::vector tests = { - TEST("Median 3x3, 1 channel (U8)", (test_median_blur<3, uint8_t, 1>), exec_median_blur<3>), - TEST("Median 3x3, 3 channel (U8)", (test_median_blur<3, uint8_t, 3>), exec_median_blur<3>), - TEST("Median 3x3, 4 channel (U8)", (test_median_blur<3, uint8_t, 4>), exec_median_blur<3>), - TEST("Median 3x3, 1 channel (U16)", (test_median_blur<3, uint16_t, 1>), exec_median_blur<3>), - TEST("Median 3x3, 3 channel (U16)", (test_median_blur<3, uint16_t, 3>), exec_median_blur<3>), - TEST("Median 3x3, 4 channel (U16)", (test_median_blur<3, uint16_t, 4>), exec_median_blur<3>), - TEST("Median 3x3, 1 channel (S16)", (test_median_blur<3, int16_t, 1>), exec_median_blur<3>), - TEST("Median 3x3, 3 channel (S16)", (test_median_blur<3, int16_t, 3>), exec_median_blur<3>), - TEST("Median 3x3, 4 channel (S16)", (test_median_blur<3, int16_t, 4>), exec_median_blur<3>), - TEST("Median 3x3, 1 channel (F32)", (test_median_blur<3, float, 1>), exec_median_blur<3>), - TEST("Median 3x3, 3 channel (F32)", (test_median_blur<3, float, 3>), exec_median_blur<3>), - TEST("Median 3x3, 4 channel (F32)", (test_median_blur<3, float, 4>), exec_median_blur<3>), - - TEST("Median 5x5, 1 channel (U8)", (test_median_blur<5, uint8_t, 1>), exec_median_blur<5>), - TEST("Median 5x5, 3 channel (U8)", (test_median_blur<5, uint8_t, 3>), exec_median_blur<5>), - TEST("Median 5x5, 4 channel (U8)", (test_median_blur<5, uint8_t, 4>), exec_median_blur<5>), - TEST("Median 5x5, 1 channel (U16)", (test_median_blur<5, uint16_t, 1>), exec_median_blur<5>), - TEST("Median 5x5, 3 channel (U16)", (test_median_blur<5, uint16_t, 3>), exec_median_blur<5>), - TEST("Median 5x5, 4 channel (U16)", (test_median_blur<5, uint16_t, 4>), exec_median_blur<5>), - TEST("Median 5x5, 1 channel (S16)", (test_median_blur<5, int16_t, 1>), exec_median_blur<5>), - TEST("Median 5x5, 3 channel (S16)", (test_median_blur<5, int16_t, 3>), exec_median_blur<5>), - TEST("Median 5x5, 4 channel (S16)", (test_median_blur<5, int16_t, 4>), exec_median_blur<5>), - TEST("Median 5x5, 1 channel (F32)", (test_median_blur<5, float, 1>), exec_median_blur<5>), - TEST("Median 5x5, 3 channel (F32)", (test_median_blur<5, float, 3>), exec_median_blur<5>), - TEST("Median 5x5, 4 channel (F32)", (test_median_blur<5, float, 4>), exec_median_blur<5>), - - TEST("Median 7x7, 1 channel (U8)", (test_median_blur<7, uint8_t, 1>), exec_median_blur<7>), - TEST("Median 7x7, 3 channel (U8)", (test_median_blur<7, uint8_t, 3>), exec_median_blur<7>), - TEST("Median 7x7, 4 channel (U8)", (test_median_blur<7, uint8_t, 4>), exec_median_blur<7>), + TEST("Median 3x3, 1 channel (U8)", (test_median_blur<3, uint8_t, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (U8)", (test_median_blur<3, uint8_t, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (U8)", (test_median_blur<3, uint8_t, 4>), exec_median_blur<3>), + TEST("Median 3x3, 1 channel (U16)", (test_median_blur<3, uint16_t, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (U16)", (test_median_blur<3, uint16_t, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (U16)", (test_median_blur<3, uint16_t, 4>), exec_median_blur<3>), + TEST("Median 3x3, 1 channel (S16)", (test_median_blur<3, int16_t, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (S16)", (test_median_blur<3, int16_t, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (S16)", (test_median_blur<3, int16_t, 4>), exec_median_blur<3>), + TEST("Median 3x3, 1 channel (F32)", (test_median_blur<3, float, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (F32)", (test_median_blur<3, float, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (F32)", (test_median_blur<3, float, 4>), exec_median_blur<3>), + TEST("Median 5x5, 1 channel (U8)", (test_median_blur<5, uint8_t, 1>), exec_median_blur<5>), + TEST("Median 5x5, 3 channel (U8)", (test_median_blur<5, uint8_t, 3>), exec_median_blur<5>), + TEST("Median 5x5, 4 channel (U8)", (test_median_blur<5, uint8_t, 4>), exec_median_blur<5>), + TEST("Median 5x5, 1 channel (U16)", (test_median_blur<5, uint16_t, 1>), exec_median_blur<5>), + TEST("Median 5x5, 3 channel (U16)", (test_median_blur<5, uint16_t, 3>), exec_median_blur<5>), + TEST("Median 5x5, 4 channel (U16)", (test_median_blur<5, uint16_t, 4>), exec_median_blur<5>), + TEST("Median 5x5, 1 channel (S16)", (test_median_blur<5, int16_t, 1>), exec_median_blur<5>), + TEST("Median 5x5, 3 channel (S16)", (test_median_blur<5, int16_t, 3>), exec_median_blur<5>), + TEST("Median 5x5, 4 channel (S16)", (test_median_blur<5, int16_t, 4>), exec_median_blur<5>), + TEST("Median 5x5, 1 channel (F32)", (test_median_blur<5, float, 1>), exec_median_blur<5>), + TEST("Median 5x5, 3 channel (F32)", (test_median_blur<5, float, 3>), exec_median_blur<5>), + TEST("Median 5x5, 4 channel (F32)", (test_median_blur<5, float, 4>), exec_median_blur<5>), + TEST("Median 7x7, 1 channel (U8)", (test_median_blur<7, uint8_t, 1>), exec_median_blur<7>), + TEST("Median 7x7, 3 channel (U8)", (test_median_blur<7, uint8_t, 3>), exec_median_blur<7>), + TEST("Median 7x7, 4 channel (U8)", (test_median_blur<7, uint8_t, 4>), exec_median_blur<7>), + TEST("Median 9x9, 1 channel (U8)", (test_median_blur<9, uint8_t, 1>), exec_median_blur<9>), + TEST("Median 9x9, 3 channel (U8)", (test_median_blur<9, uint8_t, 3>), exec_median_blur<9>), + TEST("Median 9x9, 4 channel (U8)", (test_median_blur<9, uint8_t, 4>), exec_median_blur<9>), + TEST("Median 11x11, 1 channel (U8)", (test_median_blur<11, uint8_t, 1>), exec_median_blur<11>), + TEST("Median 11x11, 3 channel (U8)", (test_median_blur<11, uint8_t, 3>), exec_median_blur<11>), + TEST("Median 11x11, 4 channel (U8)", (test_median_blur<11, uint8_t, 4>), exec_median_blur<11>), + TEST("Median 13x13, 1 channel (U8)", (test_median_blur<13, uint8_t, 1>), exec_median_blur<13>), + TEST("Median 13x13, 3 channel (U8)", (test_median_blur<13, uint8_t, 3>), exec_median_blur<13>), + TEST("Median 13x13, 4 channel (U8)", (test_median_blur<13, uint8_t, 4>), exec_median_blur<13>), + TEST("Median 15x15, 1 channel (U8)", (test_median_blur<15, uint8_t, 1>), exec_median_blur<15>), + TEST("Median 15x15, 3 channel (U8)", (test_median_blur<15, uint8_t, 3>), exec_median_blur<15>), + TEST("Median 15x15, 4 channel (U8)", (test_median_blur<15, uint8_t, 4>), exec_median_blur<15>), }; // clang-format on return tests; diff --git a/doc/functionality.md b/doc/functionality.md index 67c859361..255fb9beb 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -80,6 +80,8 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Separable Filter 2D (5x5) | | x | x | x | | | | | Gaussian Blur (3x3, 5x5, 7x7, 15x15, 21x21) | | x | | | | | | | Median Blur (3x3, 5x5, 7x7) | x | x | x | x | x | x | x | +| Median Blur (9x9, 11x11, 13x13, 15x15) | | x | | | | | | + ## Resize to quarter | | u8 | diff --git a/doc/opencv.md b/doc/opencv.md index 5644dd241..5175804c6 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -165,8 +165,11 @@ Notes on parameters: Applies median filter to a given image. Notes on parameters: -* `src.cols`,`src.rows` - image width and height must be greater than or equal to `ksize - 1` (i.e. `>= 4` for 5x5). -* `ksize` - only values 3, 5 and 7 are supported (i.e. 3x3, 5×5 and 7×7 kernels). +* `src.cols`,`src.rows` - image width and height must be greater than or equal to `ksize - 1` (e.g., `>= 4` for 5x5, `>= 14` for 15x15). +* `ksize` + - for `CV_8U`, supported kernel sizes are 3x3, 5×5, 7×7, 9×9, 11×11, 13×13, and 15×15. + - For other types, only 3x3, 5×5 and 7×7 are supported. + ### [`cv::transpose()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga46630ed6c0ea6254a35f447289bd7404) Transposes a matrix. diff --git a/kleidicv/include/kleidicv/filters/median_blur.h b/kleidicv/include/kleidicv/filters/median_blur.h index 2ee23d829..16a30f7ac 100644 --- a/kleidicv/include/kleidicv/filters/median_blur.h +++ b/kleidicv/include/kleidicv/filters/median_blur.h @@ -14,78 +14,122 @@ extern "C" { -// For internal use only. See instead kleidicv_median_blur_stripe_s8. -// find a median across an image. The stripe is defined by the -// range (y_begin, y_end]. -KLEIDICV_API_DECLARATION(kleidicv_median_blur_stripe_s8, const int8_t *src, - size_t src_stride, int8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, - size_t y_end, size_t channels, size_t kernel_width, - size_t kernel_height, +// For internal use only. See instead kleidicv_median_blur_s8. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_sorting_network_stripe_s8, + const int8_t *src, size_t src_stride, int8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, kleidicv::FixedBorderType border_type); -// For internal use only. See instead kleidicv_median_blur_stripe_u8. -// find a median across an image. The stripe is defined by the -// range (y_begin, y_end]. -KLEIDICV_API_DECLARATION(kleidicv_median_blur_stripe_u8, const uint8_t *src, - size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, - size_t y_end, size_t channels, size_t kernel_width, - size_t kernel_height, +// For internal use only. See instead kleidicv_median_blur_u8. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_sorting_network_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, kleidicv::FixedBorderType border_type); -// For internal use only. See instead kleidicv_median_blur_stripe_s16. -// Filter a horizontal stripe across an image. The stripe is defined by the -// range (y_begin, y_end]. -KLEIDICV_API_DECLARATION(kleidicv_median_blur_stripe_s16, const int16_t *src, - size_t src_stride, int16_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, - size_t y_end, size_t channels, size_t kernel_width, - size_t kernel_height, +// For internal use only. See instead kleidicv_median_blur_s16. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_sorting_network_stripe_s16, + const int16_t *src, size_t src_stride, int16_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, kleidicv::FixedBorderType border_type); -// For internal use only. See instead kleidicv_median_blur_stripe_u16. -// Filter a horizontal stripe across an image. The stripe is defined by the -// range (y_begin, y_end]. -KLEIDICV_API_DECLARATION(kleidicv_median_blur_stripe_u16, const uint16_t *src, - size_t src_stride, uint16_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, - size_t y_end, size_t channels, size_t kernel_width, - size_t kernel_height, +// For internal use only. See instead kleidicv_median_blur_u16. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_sorting_network_stripe_u16, + const uint16_t *src, size_t src_stride, uint16_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, kleidicv::FixedBorderType border_type); -// For internal use only. See instead kleidicv_median_blur_stripe_s32. -// Filter a horizontal stripe across an image. The stripe is defined by the -// range (y_begin, y_end]. -KLEIDICV_API_DECLARATION(kleidicv_median_blur_stripe_s32, const int32_t *src, - size_t src_stride, int32_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, - size_t y_end, size_t channels, size_t kernel_width, - size_t kernel_height, +// For internal use only. See instead kleidicv_median_blur_s32. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_sorting_network_stripe_s32, + const int32_t *src, size_t src_stride, int32_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, kleidicv::FixedBorderType border_type); -// For internal use only. See instead kleidicv_median_blur_stripe_u32. -// Filter a horizontal stripe across an image. The stripe is defined by the -// range (y_begin, y_end]. -KLEIDICV_API_DECLARATION(kleidicv_median_blur_stripe_u32, const uint32_t *src, - size_t src_stride, uint32_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, - size_t y_end, size_t channels, size_t kernel_width, - size_t kernel_height, +// For internal use only. See instead kleidicv_median_blur_u32. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_sorting_network_stripe_u32, + const uint32_t *src, size_t src_stride, uint32_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, kleidicv::FixedBorderType border_type); -// For internal use only. See instead kleidicv_median_blur_stripe_f32. -// Filter a horizontal stripe across an image. The stripe is defined by the -// range (y_begin, y_end]. -KLEIDICV_API_DECLARATION(kleidicv_median_blur_stripe_f32, const float *src, - size_t src_stride, float *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, - size_t y_end, size_t channels, size_t kernel_width, - size_t kernel_height, +// For internal use only. See instead kleidicv_median_blur_f32. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_sorting_network_stripe_f32, + const float *src, size_t src_stride, float *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, + kleidicv::FixedBorderType border_type); + +// For internal use only. See instead kleidicv_median_blur_u8. +// Find a median across an image. +// The stripe is defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_median_blur_small_hist_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, kleidicv::FixedBorderType border_type); } namespace kleidicv { + +namespace neon { +template +kleidicv_error_t median_blur_sorting_network_stripe( + const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, FixedBorderType border_type); + +kleidicv_error_t median_blur_small_hist_stripe_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, FixedBorderType border_type); +} // namespace neon + +namespace sve2 { + +template +kleidicv_error_t median_blur_sorting_network_stripe( + const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, FixedBorderType border_type); + +} // namespace sve2 + +namespace sme2 { + +template +kleidicv_error_t median_blur_sorting_network_stripe( + const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, FixedBorderType border_type); + +} // namespace sme2 + template inline kleidicv_error_t check_ptrs_strides_imagesizes(const T *src, size_t src_stride, T *dst, @@ -97,6 +141,13 @@ inline kleidicv_error_t check_ptrs_strides_imagesizes(const T *src, CHECK_IMAGE_SIZE(width, height); return KLEIDICV_OK; } + +inline bool is_kernel_size_supported(size_t kernel_width, + size_t kernel_height) { + return (kernel_width == kernel_height) && (kernel_width >= 3) && + (kernel_width <= 15) && ((kernel_width % 2) != 0); +} + template inline std::pair median_blur_is_implemented( const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, @@ -109,10 +160,10 @@ inline std::pair median_blur_is_implemented( } auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); + if ((src != dst) && (channels <= KLEIDICV_MAXIMUM_CHANNEL_COUNT) && - (kernel_width == kernel_height) && (height >= kernel_height - 1) && - (width >= kernel_width - 1) && - ((kernel_width == 3) || (kernel_width == 5) || (kernel_width == 7)) && + (height >= kernel_height - 1) && (width >= kernel_width - 1) && + is_kernel_size_supported(kernel_width, kernel_height) && fixed_border_type.has_value()) { return std::make_pair(KLEIDICV_OK, *fixed_border_type); } diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index eb27f7efd..0b56a5a20 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -2056,11 +2056,14 @@ kleidicv_error_t kleidicv_warp_perspective_u8( /// equal to `kernel_height - 1`. /// @param channels Number of channels in the data. Must not be more than /// @ref KLEIDICV_MAXIMUM_CHANNEL_COUNT. -/// @param kernel_width Width of the Median kernel. Must be 3 or 5 or 7 and -/// equal to `kernel_height`. -/// @param kernel_height Height of the Median kernel. Must be 3 or 5 or 7 and -/// equal to `kernel_width`. -/// +/// @param kernel_width Width of the Median kernel. Must be odd and equal to +/// `kernel_height`. +/// For `uint8_t`, values 3 to 15 are supported. For other +/// types, only 3, 5 and 7. +/// @param kernel_height Height of the Median kernel. Must be odd and equal to +/// `kernel_width`. +/// For `uint8_t`, values 3 to 15 are supported. For other +/// types, only 3, 5 and 7. /// @param border_type Way of handling the border. The supported border types /// are: \n /// - @ref KLEIDICV_BORDER_TYPE_REPLICATE \n diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index 77a11c20d..37242e7ab 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -54,10 +54,12 @@ namespace kleidicv::neon { // Alphabetical order NEON_BINARY_OP_Q_B8_B16_B32_B64(vaddq); +NEON_BINARY_OP_Q_B8_B16_B32_B64(vceqq); NEON_BINARY_OP_Q_B8_B16_B32_B64(vcleq); NEON_BINARY_OP_Q_B8_B16_B32_B64(vcgeq); NEON_BINARY_OP_Q_B8_B16_B32_B64(vqaddq); NEON_BINARY_OP_Q_B8_B16_B32_B64(vqsubq); +NEON_BINARY_OP_Q_B8_B16_B32_B64(vsubq); NEON_BINARY_OP_Q_B8_B16_B32_B64(vtrn1q); NEON_BINARY_OP_Q_B8_B16_B32_B64(vtrn2q); NEON_BINARY_OP_Q_B8_B16_B32_B64(vuzp1q); @@ -251,6 +253,21 @@ template static inline uint16x4_t vrshrn_n(uint32x4_t vec) { return vrsh template static inline int32x2_t vrshrn_n(int64x2_t vec) { return vrshrn_n_s64(vec, n); } template static inline uint32x2_t vrshrn_n(uint64x2_t vec) { return vrshrn_n_u64(vec, n); } +// ----------------------------------------------------------------------------- +// vshrq_n* +// ----------------------------------------------------------------------------- + +template static inline int8x16_t vshrq_n(int8x16_t vec) { return vshrq_n_s8(vec, n); } +template static inline uint8x16_t vshrq_n(uint8x16_t vec) { return vshrq_n_u8(vec, n); } +template static inline int16x8_t vshrq_n(int16x8_t vec) { return vshrq_n_s16(vec, n); } +template static inline uint16x8_t vshrq_n(uint16x8_t vec) { return vshrq_n_u16(vec, n); } +template static inline int32x4_t vshrq_n(int32x4_t vec) { return vshrq_n_s32(vec, n); } +template static inline uint32x4_t vshrq_n(uint32x4_t vec) { return vshrq_n_u32(vec, n); } +template static inline int64x2_t vshrq_n(int64x2_t vec) { return vshrq_n_s64(vec, n); } +template static inline uint64x2_t vshrq_n(uint64x2_t vec) { return vshrq_n_u64(vec, n); } + +// ----------------------------------------------------------------------------- + // ----------------------------------------------------------------------------- // vshll_n* // ----------------------------------------------------------------------------- diff --git a/kleidicv/src/filters/median_blur_api.cpp b/kleidicv/src/filters/median_blur_api.cpp index a95007df7..5c4b79c22 100644 --- a/kleidicv/src/filters/median_blur_api.cpp +++ b/kleidicv/src/filters/median_blur_api.cpp @@ -5,59 +5,33 @@ #include "kleidicv/dispatch.h" #include "kleidicv/filters/median_blur.h" #include "kleidicv/kleidicv.h" -namespace kleidicv { -namespace neon { - -template -kleidicv_error_t median_blur_stripe(const T *src, size_t src_stride, T *dst, - size_t dst_stride, size_t width, - size_t height, size_t y_begin, size_t y_end, - size_t channels, size_t kernel_width, - size_t kernel_height, - FixedBorderType border_type); - -} // namespace neon +#define KLEIDICV_DEFINE_C_API(name, type) \ + KLEIDICV_MULTIVERSION_C_API( \ + name, &kleidicv::neon::median_blur_sorting_network_stripe, \ + KLEIDICV_SVE2_IMPL_IF( \ + kleidicv::sve2::median_blur_sorting_network_stripe), \ + &kleidicv::sme2::median_blur_sorting_network_stripe) -namespace sve2 { +KLEIDICV_DEFINE_C_API(kleidicv_median_blur_sorting_network_stripe_s8, int8_t); -template -kleidicv_error_t median_blur_stripe(const T *src, size_t src_stride, T *dst, - size_t dst_stride, size_t width, - size_t height, size_t y_begin, size_t y_end, - size_t channels, size_t kernel_width, - size_t kernel_height, - FixedBorderType border_type); +KLEIDICV_DEFINE_C_API(kleidicv_median_blur_sorting_network_stripe_u8, uint8_t); -} // namespace sve2 +KLEIDICV_DEFINE_C_API(kleidicv_median_blur_sorting_network_stripe_u16, + uint16_t); -namespace sme2 { +KLEIDICV_DEFINE_C_API(kleidicv_median_blur_sorting_network_stripe_s16, int16_t); -template -kleidicv_error_t median_blur_stripe(const T *src, size_t src_stride, T *dst, - size_t dst_stride, size_t width, - size_t height, size_t y_begin, size_t y_end, - size_t channels, size_t kernel_width, - size_t kernel_height, - FixedBorderType border_type); +KLEIDICV_DEFINE_C_API(kleidicv_median_blur_sorting_network_stripe_u32, + uint32_t); -} // namespace sme2 +KLEIDICV_DEFINE_C_API(kleidicv_median_blur_sorting_network_stripe_s32, int32_t); -} // namespace kleidicv +KLEIDICV_DEFINE_C_API(kleidicv_median_blur_sorting_network_stripe_f32, float); -#define KLEIDICV_DEFINE_C_API(name, type) \ - KLEIDICV_MULTIVERSION_C_API( \ - name, &kleidicv::neon::median_blur_stripe, \ - KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::median_blur_stripe), \ - &kleidicv::sme2::median_blur_stripe) - -KLEIDICV_DEFINE_C_API(kleidicv_median_blur_stripe_s8, int8_t); -KLEIDICV_DEFINE_C_API(kleidicv_median_blur_stripe_u8, uint8_t); -KLEIDICV_DEFINE_C_API(kleidicv_median_blur_stripe_u16, uint16_t); -KLEIDICV_DEFINE_C_API(kleidicv_median_blur_stripe_s16, int16_t); -KLEIDICV_DEFINE_C_API(kleidicv_median_blur_stripe_u32, uint32_t); -KLEIDICV_DEFINE_C_API(kleidicv_median_blur_stripe_s32, int32_t); -KLEIDICV_DEFINE_C_API(kleidicv_median_blur_stripe_f32, float); +KLEIDICV_MULTIVERSION_C_API(kleidicv_median_blur_small_hist_stripe_u8, + &kleidicv::neon::median_blur_small_hist_stripe_u8, + nullptr, nullptr); extern "C" { @@ -76,7 +50,7 @@ kleidicv_error_t kleidicv_median_blur_s8(const int8_t *src, size_t src_stride, return checks_result; } - return kleidicv_median_blur_stripe_s8( + return kleidicv_median_blur_sorting_network_stripe_s8( src, src_stride, dst, dst_stride, width, height, 0, height, channels, kernel_width, kernel_height, fixed_border_type); } @@ -96,7 +70,13 @@ kleidicv_error_t kleidicv_median_blur_u8(const uint8_t *src, size_t src_stride, return checks_result; } - return kleidicv_median_blur_stripe_u8( + if (kernel_width > 7) { + return kleidicv_median_blur_small_hist_stripe_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels, + kernel_width, kernel_height, fixed_border_type); + } + + return kleidicv_median_blur_sorting_network_stripe_u8( src, src_stride, dst, dst_stride, width, height, 0, height, channels, kernel_width, kernel_height, fixed_border_type); } @@ -116,7 +96,7 @@ kleidicv_error_t kleidicv_median_blur_s16(const int16_t *src, size_t src_stride, return checks_result; } - return kleidicv_median_blur_stripe_s16( + return kleidicv_median_blur_sorting_network_stripe_s16( src, src_stride, dst, dst_stride, width, height, 0, height, channels, kernel_width, kernel_height, fixed_border_type); } @@ -134,7 +114,7 @@ kleidicv_error_t kleidicv_median_blur_u16( return checks_result; } - return kleidicv_median_blur_stripe_u16( + return kleidicv_median_blur_sorting_network_stripe_u16( src, src_stride, dst, dst_stride, width, height, 0, height, channels, kernel_width, kernel_height, fixed_border_type); } @@ -154,7 +134,7 @@ kleidicv_error_t kleidicv_median_blur_s32(const int32_t *src, size_t src_stride, return checks_result; } - return kleidicv_median_blur_stripe_s32( + return kleidicv_median_blur_sorting_network_stripe_s32( src, src_stride, dst, dst_stride, width, height, 0, height, channels, kernel_width, kernel_height, fixed_border_type); } @@ -172,7 +152,7 @@ kleidicv_error_t kleidicv_median_blur_u32( return checks_result; } - return kleidicv_median_blur_stripe_u32( + return kleidicv_median_blur_sorting_network_stripe_u32( src, src_stride, dst, dst_stride, width, height, 0, height, channels, kernel_width, kernel_height, fixed_border_type); } @@ -192,7 +172,7 @@ kleidicv_error_t kleidicv_median_blur_f32(const float *src, size_t src_stride, return checks_result; } - return kleidicv_median_blur_stripe_f32( + return kleidicv_median_blur_sorting_network_stripe_f32( src, src_stride, dst, dst_stride, width, height, 0, height, channels, kernel_width, kernel_height, fixed_border_type); } diff --git a/kleidicv/src/filters/median_blur_small_hist_neon.cpp b/kleidicv/src/filters/median_blur_small_hist_neon.cpp new file mode 100644 index 000000000..3c14f6eb0 --- /dev/null +++ b/kleidicv/src/filters/median_blur_small_hist_neon.cpp @@ -0,0 +1,436 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/ctypes.h" +#include "kleidicv/filters/median_blur.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +static ptrdiff_t get_physical_index(size_t index, size_t limit, + FixedBorderType border_type) { + int result = 0; + int signed_index = static_cast(index); + int signed_limit = static_cast(limit); + + if (signed_index >= 0 && signed_index < signed_limit) { + return static_cast(index); + } + switch (border_type) { + case FixedBorderType::REPLICATE: { + result = std::clamp(signed_index, 0, signed_limit - 1); + break; + } + case FixedBorderType::REFLECT: { + if (signed_index < 0) { + result = -signed_index - 1; + } else { + result = 2 * signed_limit - signed_index - 1; + } + break; + } + + case FixedBorderType::WRAP: { + result = (signed_index + signed_limit) % signed_limit; + break; + } + + case FixedBorderType::REVERSE: { + if (signed_index < 0) { + result = std::min(-signed_index, signed_limit - 1); + } else { + result = 2 * signed_limit - signed_index - 2; + } + break; + } + } + + return static_cast(result); +} + +// B. Weiss, "Fast Median and Bilateral Filtering," in *ACM SIGGRAPH 2006 +// Papers*, ACM, New York, NY, USA, pp. 519–526, 2006. +// The paper is currently available at: +// http://mesh.brown.edu/engn1610/refs/Weiss-siggraph2006.pdf +class MedianBlurSmallHist { + public: + MedianBlurSmallHist() : fine{}, coarse{} {} + + void process_pixels_with_horizontal_borders( + Rectangle image_dimensions, Point starting_coordinates, + Point ending_coordinates, Rows src_rows, + Rows dst_rows, size_t ksize, FixedBorderType border_type) { + const size_t KMargin = ksize / 2; + + for (size_t w = starting_coordinates.x(); w < ending_coordinates.x(); w++) { + for (ptrdiff_t ch = 0; ch < static_cast(src_rows.channels()); + ch++) { + scalar_clear_histogram(); + + // We initialize with ksize rows to allow merging of + // histogram increment and decrement operations in the main loop. + // This extra initial load enables a single update phase and avoids + // splitting the logic into separate steps. + for (size_t r = 0; r < ksize; r++) { + for (size_t c = 0; c < ksize; c++) { + const ptrdiff_t valid_h = + get_physical_index(starting_coordinates.y() + r - KMargin, + image_dimensions.height(), border_type); + const ptrdiff_t valid_w = get_physical_index( + w + c - KMargin, image_dimensions.width(), border_type); + + uint8_t pixel = src_rows.at(valid_h, valid_w)[ch]; + + scalar_initialize_histogram(pixel); + } + } + + const uint8_t median_value = scalar_find_median(ksize); + + dst_rows.at(static_cast(starting_coordinates.y()), + static_cast(w))[ch] = median_value; + + for (size_t h = starting_coordinates.y() + 1; + h < ending_coordinates.y(); h++) { + const ptrdiff_t valid_new_h = get_physical_index( + h + KMargin, image_dimensions.height(), border_type); + + const ptrdiff_t valid_old_h = get_physical_index( + h - KMargin - 1, image_dimensions.height(), border_type); + + for (size_t c = 0; c < ksize; c++) { + const ptrdiff_t valid_w = get_physical_index( + w + c - KMargin, image_dimensions.width(), border_type); + + uint8_t incoming_pixel = src_rows.at(valid_new_h, valid_w)[ch]; + + uint8_t outgoing_pixel = src_rows.at(valid_old_h, valid_w)[ch]; + + scalar_update_histogram(incoming_pixel, outgoing_pixel); + } + + const uint8_t median_value = scalar_find_median(ksize); + + dst_rows.at(static_cast(h), + static_cast(w))[ch] = median_value; + } + } + } + } + + void process_pixels_without_horizontal_borders( + Rectangle image_dimensions, Point starting_coordinates, + Point ending_coordinates, Rows src_rows, + Rows dst_rows, size_t ksize, FixedBorderType border_type) { + const size_t step = sizeof(uint8x16_t); + const size_t KMargin_w = (ksize / 2) * src_rows.channels(); + const size_t KMargin_h = (ksize / 2); + + for (size_t w = starting_coordinates.x(); w < ending_coordinates.x(); + w += step) { + vector_clear_histogram(); + + // We initialize with ksize rows to allow merging of + // histogram increment and decrement operations in the main loop. + // This extra initial load enables a single update phase and avoids + // splitting the logic into separate steps. + for (size_t r = 0; r < ksize; r++) { + const ptrdiff_t vertical_index = + get_physical_index(starting_coordinates.y() + r - KMargin_h, + image_dimensions.height(), border_type); + + for (size_t c = 0; c < ksize; c++) { + const size_t horizontal_index = + w + c * src_rows.channels() - KMargin_w; + + uint8x16_t pixel = vld1q_u8( + &src_rows[vertical_index * src_rows.stride() + horizontal_index]); + + vector_initialize_histogram(pixel); + } + } + + const uint8x16_t median_value = vector_find_median(ksize); + + vst1q_u8(&dst_rows[starting_coordinates.y() * dst_rows.stride() + w], + median_value); + + for (size_t h = starting_coordinates.y() + 1; h < ending_coordinates.y(); + ++h) { + const ptrdiff_t vertical_index_new = get_physical_index( + h + KMargin_h, image_dimensions.height(), border_type); + + const ptrdiff_t vertical_index_old = get_physical_index( + h - KMargin_h - 1, image_dimensions.height(), border_type); + + for (size_t c = 0; c < ksize; c++) { + size_t horizontal_index = w + c * src_rows.channels() - KMargin_w; + + uint8x16_t incoming_pixels = + vld1q_u8(&src_rows[vertical_index_new * src_rows.stride() + + horizontal_index]); + + uint8x16_t outgoing_pixels = + vld1q_u8(&src_rows[vertical_index_old * src_rows.stride() + + horizontal_index]); + + vector_update_histogram(incoming_pixels, outgoing_pixels); + } + + const uint8x16_t median_value = vector_find_median(ksize); + + vst1q_u8(&dst_rows[h * dst_rows.stride() + w], median_value); + } + } + } + + private: + // The 'fine' and 'coarse' histograms are shared between both scalar and + // vector operations. Their buffer sizes are allocated based on the + // vectorized case to ensure compatibility and avoid reallocation. + // In case of vectorized execution, 'fine' and 'coarse' are actually + // implemented as 16 interleaved histograms, one per vector lane. + uint8_t fine[4096]; + uint8_t coarse[256]; + + // In scalar_clear_histogram, we only clear the relevant portions of the + // 'fine' and 'coarse' buffers that are actually used during computation. This + // avoids unnecessary memory operations. + void scalar_clear_histogram() { + memset(fine, 0, sizeof(fine[0]) * 256); + memset(coarse, 0, sizeof(coarse[0]) * 16); + } + + void vector_clear_histogram() { + memset(fine, 0, sizeof(uint8_t) * 4096); + memset(coarse, 0, sizeof(uint8_t) * 256); + } + + // Before the main vertical loop over 'height', the histogram must be + // initialized for each new 'width'. This is done using either + // scalar_initialize_histogram or vector_initialize_histogram depending on the + // processing mode. These functions preload the histogram using rows from the + // source image to enable efficient sliding window updates during vertical + // traversal. + void scalar_initialize_histogram(uint8_t incoming_pixel) { + fine[incoming_pixel]++; + coarse[incoming_pixel >> 4]++; + } + + void vector_initialize_histogram(uint8x16_t& incoming_pixels) { + KLEIDICV_FORCE_LOOP_UNROLL + for (int i = 0; i < 16; i++) { + fine[incoming_pixels[i] * 16 + i]++; + } + + incoming_pixels = vshrq_n_u8(incoming_pixels, 4); + + uint8x16_t* vec_coarse = reinterpret_cast(coarse); + vec_coarse[0] = vsubq(vec_coarse[0], vceqzq_u8(incoming_pixels)); + KLEIDICV_FORCE_LOOP_UNROLL + for (int i = 1; i < 16; i++) { + uint8x16_t index = vdupq_n_u8(i); + vec_coarse[i] = vsubq(vec_coarse[i], vceqq(incoming_pixels, index)); + } + } + + // During vertical traversal (the main 'height' loop), each sliding window + // iteration introduces a new incoming row and removes an outgoing one. The + // histogram must be updated accordingly by subtracting the contributions of + // the outgoing row and adding those of the incoming row. + // In many cases, incoming and outgoing pixels may be equal, so we perform a + // conditional check to avoid unnecessary updates. + // Both increment and decrement operations are handled inside the same + // function (scalar_update_histogram / vector_update_histogram) for + // efficiency. + void scalar_update_histogram(uint8_t& incoming_pixel, + uint8_t& outgoing_pixel) { + if (incoming_pixel != outgoing_pixel) { + fine[incoming_pixel]++; + coarse[incoming_pixel >> 4]++; + fine[outgoing_pixel]--; + coarse[outgoing_pixel >> 4]--; + } + } + + void vector_update_histogram(uint8x16_t& incoming_pixels, + uint8x16_t& outgoing_pixels) { + KLEIDICV_FORCE_LOOP_UNROLL + for (int i = 0; i < 16; i++) { + fine[incoming_pixels[i] * 16 + i]++; + fine[outgoing_pixels[i] * 16 + i]--; + } + + uint8x16_t* vec_coarse = reinterpret_cast(coarse); + incoming_pixels = vshrq_n<4>(incoming_pixels); + outgoing_pixels = vshrq_n<4>(outgoing_pixels); + + uint8x16_t delta = + vsubq(vceqzq_u8(outgoing_pixels), vceqzq_u8(incoming_pixels)); + vec_coarse[0] = vaddq(vec_coarse[0], delta); + + KLEIDICV_FORCE_LOOP_UNROLL + for (int i = 1; i < 16; i++) { + uint8x16_t index = vdupq_n_u8(i); + delta = + vsubq(vceqq(outgoing_pixels, index), vceqq(incoming_pixels, index)); + vec_coarse[i] = vaddq(vec_coarse[i], delta); + } + } + + // To find the median efficiently, we first scan the coarse histogram to + // identify the segment (coarse bin) where the median value lies. This helps + // narrow down the search range in the fine histogram. Once the correct coarse + // bin is located, we scan the corresponding segment in the fine histogram + // until the cumulative distribution function (CDF) reaches the target CDF + uint8_t scalar_find_median(size_t ksize) { + // The target median index in a sorted window + const uint8_t target_cdf = (ksize * ksize) / 2; + + // Variables for histogram scanning + uint8_t cumulative_sum = 0; + int fine_index = 0; + int coarse_index = 0; + + // Phase 1: Coarse histogram scan to find the correct bin range + while (true) { + if ((cumulative_sum + coarse[coarse_index]) > target_cdf) { + fine_index = coarse_index * 16; + break; + } + cumulative_sum += coarse[coarse_index]; + coarse_index++; + } + + // Phase 2: Fine histogram scan to locate the exact median value + while (true) { + cumulative_sum += fine[fine_index]; + if (cumulative_sum > target_cdf) { + break; + } + fine_index++; + } + + return fine_index; + } + + uint8x16_t vector_find_median(size_t ksize) { + // Calculate the target median index based on kernel size + const uint8x16_t target_cdf = vdupq_n_u8((ksize * ksize) / 2); + + // Cumulative sum vector used for tracking the running histogram total + uint8x16_t cumulative_sum = vdupq_n_u8(0); + + // Coarse histogram pointer (used to narrow the search) + uint8x16_t* coarse_histogram = reinterpret_cast(coarse); + + // Coarse pass: Locate the coarse histogram bin range likely containing the + // median value This step identifies the starting fine histogram index for + // each lane, based on cumulative counts. It does not find the actual median + // yet. + int coarse_index = 0; + int fine_index = 0; + + while (true) { + uint8x16_t cumulative_sum_next = + vaddq(cumulative_sum, coarse_histogram[coarse_index]); + uint8x16_t coarse_threshold_exceeded = + vcgtq_u8(cumulative_sum_next, target_cdf); + + if (any_lane_set(coarse_threshold_exceeded)) { + fine_index = coarse_index * 16; + break; + } + + cumulative_sum = cumulative_sum_next; + coarse_index++; + } + + // Fine pass: Scan the fine histogram to find the exact median per lane + uint8x16_t median_result = vdupq_n_u8(0); + uint8x16_t lane_found_mask = vdupq_n_u8(0); + uint8x16_t* fine_histogram = reinterpret_cast(fine); + + while (true) { + cumulative_sum = vaddq(cumulative_sum, fine_histogram[fine_index]); + + uint8x16_t still_searching_mask = vceqzq_u8(lane_found_mask); + median_result = + vbslq_u8(still_searching_mask, vdupq_n_u8(fine_index), median_result); + lane_found_mask = + vorrq_u8(lane_found_mask, vcgtq_u8(cumulative_sum, target_cdf)); + + if (all_lane_set(lane_found_mask)) { + break; + } + fine_index++; + } + + return median_result; + } + + bool all_lane_set(uint8x16_t& v_u8) { + uint32x4_t v_u32 = vreinterpretq_u32_u8(v_u8); + return vminvq_u32(v_u32) == 0xffffffff; + } + + bool any_lane_set(uint8x16_t v_u8) { + uint32x4_t v_u32 = vreinterpretq_u32_u8(v_u8); + return vmaxvq_u32(v_u32) != 0; + } +}; + +kleidicv_error_t median_blur_small_hist_stripe_u8( + const uint8_t* src, size_t src_stride, uint8_t* dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, size_t kernel_height, FixedBorderType border_type) { + Rectangle image_dimensions{width, height}; + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; + MedianBlurSmallHist median_filter; + const size_t KMargin = kernel_width / 2; + + // Process left border + size_t starting_width = 0; + const size_t processing_left_width = KMargin; + Point starting_left_coordinates{starting_width, y_begin}; + Point ending_left_coordinates{starting_width + processing_left_width, y_end}; + + median_filter.process_pixels_with_horizontal_borders( + image_dimensions, starting_left_coordinates, ending_left_coordinates, + src_rows, dst_rows, kernel_height, border_type); + + // Process center region + starting_width = processing_left_width; + // Compute the width of the center region that can be processed with NEON + // instructions. Subtract 2 * KMargin to exclude left and right borders, which + // are handled separately using scalar code due to varying border modes (e.g., + // REPLICATE, REFLECT, WRAP, REVERSE). Align the remaining width down to the + // nearest multiple of 16 to match NEON's 128-bit register width (16 bytes for + // uint8x16_t). + const size_t processing_center_width = ((width - 2 * KMargin) / 16) * 16; + Point starting_center_coordinates{starting_width * channels, y_begin}; + Point ending_center_coordinates{ + (processing_center_width + starting_width) * channels, y_end}; + + median_filter.process_pixels_without_horizontal_borders( + image_dimensions, starting_center_coordinates, ending_center_coordinates, + src_rows, dst_rows, kernel_height, border_type); + + // Process right border + starting_width = processing_left_width + processing_center_width; + const size_t processing_right_width = + width - processing_left_width - processing_center_width; + Point starting_right_coordinates{starting_width, y_begin}; + Point ending_right_coordinates{starting_width + processing_right_width, + y_end}; + + median_filter.process_pixels_with_horizontal_borders( + image_dimensions, starting_right_coordinates, ending_right_coordinates, + src_rows, dst_rows, kernel_height, border_type); + + return KLEIDICV_OK; +} +} // namespace kleidicv::neon diff --git a/kleidicv/src/filters/median_blur_neon.cpp b/kleidicv/src/filters/median_blur_sorting_network_neon.cpp similarity index 83% rename from kleidicv/src/filters/median_blur_neon.cpp rename to kleidicv/src/filters/median_blur_sorting_network_neon.cpp index bfbe7537d..e46e37c9a 100644 --- a/kleidicv/src/filters/median_blur_neon.cpp +++ b/kleidicv/src/filters/median_blur_sorting_network_neon.cpp @@ -19,7 +19,7 @@ namespace kleidicv::neon { // Primary template for Median Blur filters. template -class MedianBlur; +class MedianBlurSortingNetwork; template class VectorizedComparator { @@ -78,7 +78,7 @@ class ScalarComparator { // Template for Median Blur 3x3 filters. template -class MedianBlur { +class MedianBlurSortingNetwork { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -118,11 +118,11 @@ class MedianBlur { sorting_network3x3_dual_rows>( KernelWindow, output_vec0, output_vec1, ctx); } -}; // end of class MedianBlur +}; // end of class MedianBlurSortingNetwork // Template for Median Blur 5x5 filters. template -class MedianBlur { +class MedianBlurSortingNetwork { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -143,11 +143,11 @@ class MedianBlur { Monostate ctx; sorting_network5x5>(KernelWindow, dst, ctx); } -}; // end of class MedianBlur +}; // end of class MedianBlurSortingNetwork // Template for Median Blur 7x7 filters. template -class MedianBlur { +class MedianBlurSortingNetwork { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -168,45 +168,49 @@ class MedianBlur { Monostate ctx; sorting_network7x7>(KernelWindow, dst, ctx); } -}; // end of class MedianBlur +}; // end of class MedianBlurSortingNetworkSortingNetwork template -kleidicv_error_t median_blur_stripe(const T* src, size_t src_stride, T* dst, - size_t dst_stride, size_t width, - size_t height, size_t y_begin, size_t y_end, - size_t channels, size_t kernel_width, - [[maybe_unused]] size_t kernel_height, - FixedBorderType border_type) { +kleidicv_error_t median_blur_sorting_network_stripe( + const T* src, size_t src_stride, T* dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, size_t channels, + size_t kernel_width, [[maybe_unused]] size_t kernel_height, + FixedBorderType border_type) { Rectangle rect{width, height}; Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; if (kernel_width == 3) { - MedianBlur median_filter; - Filter2D3x3> filter{median_filter}; + MedianBlurSortingNetwork median_filter; + Filter2D3x3> filter{median_filter}; process_filter2d_by_dual_rows(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); - } else if (kernel_width == 5) { - MedianBlur median_filter; - Filter2D5x5> filter{median_filter}; + return KLEIDICV_OK; + } + if (kernel_width == 5) { + MedianBlurSortingNetwork median_filter; + Filter2D5x5> filter{median_filter}; process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); return KLEIDICV_OK; - } else { - MedianBlur median_filter; - Filter2D7x7> filter{median_filter}; + } + if (kernel_width == 7) { + MedianBlurSortingNetwork median_filter; + Filter2D7x7> filter{median_filter}; process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); + return KLEIDICV_OK; } - return KLEIDICV_OK; + return KLEIDICV_ERROR_NOT_IMPLEMENTED; } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ - template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t median_blur_stripe( \ - const type* src, size_t src_stride, type* dst, size_t dst_stride, \ - size_t width, size_t height, size_t y_begin, size_t y_end, \ - size_t channels, size_t kernel_width, size_t kernel_height, \ +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + median_blur_sorting_network_stripe( \ + const type* src, size_t src_stride, type* dst, size_t dst_stride, \ + size_t width, size_t height, size_t y_begin, size_t y_end, \ + size_t channels, size_t kernel_width, size_t kernel_height, \ FixedBorderType border_type) KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); diff --git a/kleidicv/src/filters/median_blur_sc.h b/kleidicv/src/filters/median_blur_sorting_network_sc.h similarity index 84% rename from kleidicv/src/filters/median_blur_sc.h rename to kleidicv/src/filters/median_blur_sorting_network_sc.h index d16629e87..895b363c3 100644 --- a/kleidicv/src/filters/median_blur_sc.h +++ b/kleidicv/src/filters/median_blur_sorting_network_sc.h @@ -23,7 +23,7 @@ namespace KLEIDICV_TARGET_NAMESPACE { // Primary template for Median Blur filters. template -class MedianBlur; +class MedianBlurSortingNetwork; template class VectorComparator { @@ -61,7 +61,7 @@ class VectorComparator { // Template for Median Blur 3x3 filters. template -class MedianBlur { +class MedianBlurSortingNetwork { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -84,11 +84,11 @@ class MedianBlur { sorting_network3x3_dual_rows>( KernelWindow, output_vec_0, output_vec_1, pg); } -}; // end of class MedianBlur +}; // end of class MedianBlurSortingNetwork // Template for Median Blur 5x5 filters. template -class MedianBlur { +class MedianBlurSortingNetwork { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -97,7 +97,6 @@ class MedianBlur { using SourceVectorType = typename SourceVecTraits::VectorType; using DestinationVectorType = typename KLEIDICV_TARGET_NAMESPACE::VecTraits< DestinationType>::VectorType; - template void vector_path(svbool_t& pg, KernelWindowFunctor& KernelWindow, DestinationVectorType& output_vec) const @@ -105,11 +104,11 @@ class MedianBlur { sorting_network5x5>(KernelWindow, output_vec, pg); } -}; // end of class MedianBlur +}; // end of class MedianBlurSortingNetworkSortingNetwork // Template for Median Blur 7x7 filters. template -class MedianBlur { +class MedianBlurSortingNetwork { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -126,36 +125,41 @@ class MedianBlur { sorting_network7x7>(KernelWindow, output_vec, pg); } -}; // end of class MedianBlur +}; // end of class MedianBlurSortingNetworkSortingNetwork template -kleidicv_error_t median_blur_stripe_sc( +kleidicv_error_t median_blur_sorting_network_stripe_sc( const T* src, size_t src_stride, T* dst, size_t dst_stride, size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, - [[maybe_unused]] size_t kernel_width, [[maybe_unused]] size_t kernel_height, + size_t kernel_width, [[maybe_unused]] size_t kernel_height, FixedBorderType border_type) KLEIDICV_STREAMING_COMPATIBLE { Rectangle rect{width, height}; Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; + if (kernel_width == 3) { - MedianBlur median_filter; - Filter2D3x3> filter{median_filter}; + MedianBlurSortingNetwork median_filter; + Filter2D3x3> filter{median_filter}; process_filter2d_by_dual_rows(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); - } else if (kernel_width == 5) { - MedianBlur median_filter; - Filter2D5x5> filter{median_filter}; + return KLEIDICV_OK; + } + if (kernel_width == 5) { + MedianBlurSortingNetwork median_filter; + Filter2D5x5> filter{median_filter}; process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); return KLEIDICV_OK; - } else { - MedianBlur median_filter; - Filter2D7x7> filter{median_filter}; + } + if (kernel_width == 7) { + MedianBlurSortingNetwork median_filter; + Filter2D7x7> filter{median_filter}; process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); + return KLEIDICV_OK; } - return KLEIDICV_OK; + return KLEIDICV_ERROR_NOT_IMPLEMENTED; } } // namespace KLEIDICV_TARGET_NAMESPACE diff --git a/kleidicv/src/filters/median_blur_sme2.cpp b/kleidicv/src/filters/median_blur_sorting_network_sme2.cpp similarity index 52% rename from kleidicv/src/filters/median_blur_sme2.cpp rename to kleidicv/src/filters/median_blur_sorting_network_sme2.cpp index f946540b8..893e870aa 100644 --- a/kleidicv/src/filters/median_blur_sme2.cpp +++ b/kleidicv/src/filters/median_blur_sorting_network_sme2.cpp @@ -3,26 +3,29 @@ // SPDX-License-Identifier: Apache-2.0 #include "kleidicv/filters/median_blur.h" -#include "median_blur_sc.h" +#include "median_blur_sorting_network_sc.h" namespace kleidicv::sme2 { template KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t -median_blur_stripe(const T* src, size_t src_stride, T* dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, size_t y_end, - size_t channels, size_t kernel_width, size_t kernel_height, - FixedBorderType border_type) { - return median_blur_stripe_sc(src, src_stride, dst, dst_stride, width, height, - y_begin, y_end, channels, kernel_width, - kernel_height, border_type); +median_blur_sorting_network_stripe(const T* src, size_t src_stride, T* dst, + size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, + size_t channels, size_t kernel_width, + size_t kernel_height, + FixedBorderType border_type) { + return median_blur_sorting_network_stripe_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_width, kernel_height, border_type); } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ - template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t median_blur_stripe( \ - const type* src, size_t src_stride, type* dst, size_t dst_stride, \ - size_t width, size_t height, size_t y_begin, size_t y_end, \ - size_t channels, size_t kernel_width, size_t kernel_height, \ +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + median_blur_sorting_network_stripe( \ + const type* src, size_t src_stride, type* dst, size_t dst_stride, \ + size_t width, size_t height, size_t y_begin, size_t y_end, \ + size_t channels, size_t kernel_width, size_t kernel_height, \ FixedBorderType border_type) KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); diff --git a/kleidicv/src/filters/median_blur_sve2.cpp b/kleidicv/src/filters/median_blur_sorting_network_sve2.cpp similarity index 68% rename from kleidicv/src/filters/median_blur_sve2.cpp rename to kleidicv/src/filters/median_blur_sorting_network_sve2.cpp index 4e40ece94..0fe846654 100644 --- a/kleidicv/src/filters/median_blur_sve2.cpp +++ b/kleidicv/src/filters/median_blur_sorting_network_sve2.cpp @@ -3,25 +3,26 @@ // SPDX-License-Identifier: Apache-2.0 #include "kleidicv/filters/median_blur.h" -#include "median_blur_sc.h" +#include "median_blur_sorting_network_sc.h" namespace kleidicv::sve2 { template -KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t median_blur_stripe( +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t median_blur_sorting_network_stripe( const T* src, size_t src_stride, T* dst, size_t dst_stride, size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, size_t kernel_width, size_t kernel_height, FixedBorderType border_type) { - return median_blur_stripe_sc(src, src_stride, dst, dst_stride, width, height, - y_begin, y_end, channels, kernel_width, - kernel_height, border_type); + return median_blur_sorting_network_stripe_sc( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, + kernel_width, kernel_height, border_type); } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ - template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t median_blur_stripe( \ - const type* src, size_t src_stride, type* dst, size_t dst_stride, \ - size_t width, size_t height, size_t y_begin, size_t y_end, \ - size_t channels, size_t kernel_width, size_t kernel_height, \ +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + median_blur_sorting_network_stripe( \ + const type* src, size_t src_stride, type* dst, size_t dst_stride, \ + size_t width, size_t height, size_t y_begin, size_t y_end, \ + size_t channels, size_t kernel_width, size_t kernel_height, \ FixedBorderType border_type) KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 8f413db0f..3d5ddb041 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -593,8 +593,17 @@ kleidicv_error_t kleidicv_thread_median_blur_u8( return checks_result; } + if (kernel_width > 7) { + auto callback = [=](unsigned y_begin, unsigned y_end) { + return kleidicv_median_blur_small_hist_stripe_u8( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_width, kernel_height, fixed_border_type); + }; + return parallel_batches(callback, mt, height); + } + auto callback = [=](unsigned y_begin, unsigned y_end) { - return kleidicv_median_blur_stripe_u8( + return kleidicv_median_blur_sorting_network_stripe_u8( src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, kernel_width, kernel_height, fixed_border_type); }; @@ -617,7 +626,7 @@ kleidicv_error_t kleidicv_thread_median_blur_s16( } auto callback = [=](unsigned y_begin, unsigned y_end) { - return kleidicv_median_blur_stripe_s16( + return kleidicv_median_blur_sorting_network_stripe_s16( src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, kernel_width, kernel_height, fixed_border_type); }; @@ -640,7 +649,7 @@ kleidicv_error_t kleidicv_thread_median_blur_u16( } auto callback = [=](unsigned y_begin, unsigned y_end) { - return kleidicv_median_blur_stripe_u16( + return kleidicv_median_blur_sorting_network_stripe_u16( src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, kernel_width, kernel_height, fixed_border_type); }; @@ -663,7 +672,7 @@ kleidicv_error_t kleidicv_thread_median_blur_f32( } auto callback = [=](unsigned y_begin, unsigned y_end) { - return kleidicv_median_blur_stripe_f32( + return kleidicv_median_blur_sorting_network_stripe_f32( src, src_stride, dst, dst_stride, width, height, y_begin, y_end, channels, kernel_width, kernel_height, fixed_border_type); }; diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index 3e2024fed..3b22f42f2 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -31,9 +31,13 @@ SepFilter2D_5x5_U8: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' ' SepFilter2D_5x5_U16: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '($PIXEL_FORMAT, 16UC1, 5, BORDER_REPLICATE)' SepFilter2D_5x5_S16: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '($PIXEL_FORMAT, 16SC1, 5, BORDER_REPLICATE)' -MedianBlur3x3: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 3)' -MedianBlur5x5: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 5)' -MedianBlur7x7: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 7)' +MedianBlur3x3: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 3)' +MedianBlur5x5: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 5)' +MedianBlur7x7: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 7)' +MedianBlur9x9: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 9)' +MedianBlur11x11: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 11)' +MedianBlur13x13: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 13)' +MedianBlur15x15: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 15)' GaussianBlur3x3: opencv_perf_imgproc '*gaussianBlur3x3/*' '($PIXEL_FORMAT, 8UC1, BORDER_REPLICATE)' GaussianBlur5x5: opencv_perf_imgproc '*gaussianBlur5x5/*' '($PIXEL_FORMAT, 8UC1, BORDER_REPLICATE)' diff --git a/test/api/test_median_blur.cpp b/test/api/test_median_blur.cpp index 2c1b12c72..c54ba43e7 100644 --- a/test/api/test_median_blur.cpp +++ b/test/api/test_median_blur.cpp @@ -98,7 +98,7 @@ class MedianBlurTest : public testing::Test { size_t filter_size) { std::vector widths = {25, filter_size - 1}; std::vector src_paddings = {0}; - std::vector dst_paddings = {0}; + std::vector dst_paddings = {3}; std::vector heights = {filter_size, filter_size - 1}; std::vector channels = {1, 2, 3, 4}; std::vector filter_sizes = {filter_size}; @@ -110,6 +110,20 @@ class MedianBlurTest : public testing::Test { channels, filter_sizes, border_types); } + static std::vector get_mid_range_filter_test_cases() { + std::vector widths = {50}; + std::vector src_paddings = {0}; + std::vector dst_paddings = {5}; + std::vector heights = {20}; + std::vector channels = {1, 4}; + std::vector filter_sizes = {9, 15}; + std::vector border_types = { + KLEIDICV_BORDER_TYPE_REPLICATE, KLEIDICV_BORDER_TYPE_REFLECT, + KLEIDICV_BORDER_TYPE_WRAP, KLEIDICV_BORDER_TYPE_REVERSE}; + return generate_test_cases(widths, src_paddings, dst_paddings, heights, + channels, filter_sizes, border_types); + } + void run_test_case(const TestParams& params) { test::Array2D src{params.width * params.channels, params.height, params.src_padding, @@ -259,20 +273,20 @@ TYPED_TEST(MedianBlurTest, BorderNotImplemented) { } TYPED_TEST(MedianBlurTest, HeightTooSmall) { - test::Array2D src{100, 3}; - test::Array2D dst{100, 3}; + test::Array2D src{100, 4}; + test::Array2D dst{100, 4}; EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, median_blur()(src.data(), src.stride(), dst.data(), - dst.stride(), 100, 3, 1, 5, 5, + dst.stride(), 100, 3, 1, 7, 7, KLEIDICV_BORDER_TYPE_REPLICATE)); } TYPED_TEST(MedianBlurTest, WidthTooSmall) { - test::Array2D src{3, 100}; - test::Array2D dst{3, 100}; + test::Array2D src{4, 100}; + test::Array2D dst{4, 100}; EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, median_blur()(src.data(), src.stride(), dst.data(), - dst.stride(), 3, 100, 1, 5, 5, + dst.stride(), 3, 100, 1, 7, 7, KLEIDICV_BORDER_TYPE_REPLICATE)); } @@ -309,22 +323,52 @@ TYPED_TEST(MedianBlurTest, OversizeImage) { 5, 5, KLEIDICV_BORDER_TYPE_REPLICATE)); } -TYPED_TEST(MedianBlurTest, UnsupportedLargeFilterSize) { +TYPED_TEST(MedianBlurTest, UnsupportedFilterSizes) { test::Array2D src{100, 100}; test::Array2D dst{100, 100}; + + // Test unsupported large square filter EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, median_blur()(src.data(), src.stride(), dst.data(), dst.stride(), 100, 100, 1, 100, 100, KLEIDICV_BORDER_TYPE_REPLICATE)); -} -TYPED_TEST(MedianBlurTest, NonSquareFilterSizeWithValidHeight) { - test::Array2D src{100, 100}; - test::Array2D dst{100, 100}; + // Test non-square filter with valid height EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, median_blur()(src.data(), src.stride(), dst.data(), dst.stride(), 100, 100, 1, 100, 5, KLEIDICV_BORDER_TYPE_REPLICATE)); + + // Test non-square filter with valid width + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + median_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 100, 100, 1, 5, 100, + KLEIDICV_BORDER_TYPE_REPLICATE)); + + // Test unsupported small filter + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + median_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 100, 100, 1, 1, 1, + KLEIDICV_BORDER_TYPE_REPLICATE)); + + // Test unsupported even filter + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + median_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 100, 100, 1, 4, 4, + KLEIDICV_BORDER_TYPE_REPLICATE)); + + // Test mid-range square filters that are not implemented + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + median_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 100, 100, 1, 9, 9, + KLEIDICV_BORDER_TYPE_TRANSPARENT)); + + if (!std::is_same_v) { + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + median_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 100, 100, 1, 9, 9, + KLEIDICV_BORDER_TYPE_REPLICATE)); + } } TYPED_TEST(MedianBlurTest, NonSquareFilterSizeWithValidWidth) { @@ -401,3 +445,13 @@ TYPED_TEST(MedianBlurByteStrideTest, RunAllParamCombinationsWithPadding) { this->run_test_case(params); } } + +template +class MedianBlurMidRangeTest : public MedianBlurTest {}; +using ByteType = ::testing::Types; +TYPED_TEST_SUITE(MedianBlurMidRangeTest, ByteType); +TYPED_TEST(MedianBlurMidRangeTest, RunAllParamCombinationsWithMidRangeFilters) { + for (const auto& params : TestFixture::get_mid_range_filter_test_cases()) { + this->run_test_case(params); + } +} diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 6b8eb324e..f93d8b8cc 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -113,7 +113,8 @@ class Thread : public testing::TestWithParam

{ (void)thread_count; size_t channels = 1; kleidicv_border_type_t border_type = KLEIDICV_BORDER_TYPE_REPLICATE; - for (auto ksize : {3, 5, 7}) { + const auto &filter_size = std::vector{3, 5, 7, 9}; + for (auto ksize : filter_size) { check_unary_op(single_threaded_func, multithreaded_func, channels, channels, channels, ksize, ksize, border_type); } @@ -414,6 +415,18 @@ void check_median_blur_not_implemented(MultithreadedFunc multithreaded_func) { multithreaded_func(src1.data(), src1.stride(), dst1.data(), dst1.stride(), 25, 25, 1, 4, 4, KLEIDICV_BORDER_TYPE_REPLICATE, get_multithreading_fake(2))); + + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + multithreaded_func(src1.data(), src1.stride(), dst1.data(), dst1.stride(), + 25, 25, 1, 4, 4, KLEIDICV_BORDER_TYPE_REPLICATE, + get_multithreading_fake(2))); + + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + multithreaded_func(src1.data(), src1.stride(), dst1.data(), dst1.stride(), + 25, 25, 1, 9, 9, KLEIDICV_BORDER_TYPE_TRANSPARENT, + get_multithreading_fake(2))); } TEST(ThreadMedianBlur, NotImplemented) { -- GitLab