From 4430e8f9550a2e895d9b479d6d9e42c434c0f03e Mon Sep 17 00:00:00 2001 From: Noureldin Abdelfattah Date: Mon, 23 Jun 2025 16:08:50 +0100 Subject: [PATCH] Add Median 3x3 implementation --- CHANGELOG.md | 3 + benchmark/benchmark.cpp | 14 + conformity/opencv/test_median_blur.cpp | 13 + doc/functionality.md | 2 +- doc/opencv.md | 2 +- .../kleidicv/filters/filter_2d_5x5_neon.h | 101 ------ .../kleidicv/filters/filter_2d_5x5_sc.h | 109 ------ .../kleidicv/filters/filter_2d_7x7_neon.h | 103 ------ .../kleidicv/filters/filter_2d_7x7_sc.h | 123 ------- .../include/kleidicv/filters/filter_2d_neon.h | 177 ++++++++++ .../include/kleidicv/filters/filter_2d_sc.h | 309 ++++++++++++++++++ .../filters/filter_2d_window_loader_3x3.h | 68 ++++ ...5_base.h => filter_2d_window_loader_5x5.h} | 22 +- ...7_base.h => filter_2d_window_loader_7x7.h} | 22 +- .../include/kleidicv/filters/median_blur.h | 2 +- .../{filter_2d.h => process_fitler_2d.h} | 67 +++- kleidicv/include/kleidicv/kleidicv.h | 9 +- kleidicv/src/filters/median_blur_neon.cpp | 175 ++++++---- kleidicv/src/filters/median_blur_sc.h | 134 +++++--- .../filters/median_blur_sorting_network_3x3.h | 109 ++++++ scripts/benchmark/benchmarks.txt | 1 + test/api/test_median_blur.cpp | 27 +- test/api/test_thread.cpp | 5 +- 23 files changed, 1006 insertions(+), 591 deletions(-) delete mode 100644 kleidicv/include/kleidicv/filters/filter_2d_5x5_neon.h delete mode 100644 kleidicv/include/kleidicv/filters/filter_2d_5x5_sc.h delete mode 100644 kleidicv/include/kleidicv/filters/filter_2d_7x7_neon.h delete mode 100644 kleidicv/include/kleidicv/filters/filter_2d_7x7_sc.h create mode 100644 kleidicv/include/kleidicv/filters/filter_2d_neon.h create mode 100644 kleidicv/include/kleidicv/filters/filter_2d_sc.h create mode 100644 kleidicv/include/kleidicv/filters/filter_2d_window_loader_3x3.h rename kleidicv/include/kleidicv/filters/{filter_2d_5x5_base.h => filter_2d_window_loader_5x5.h} (86%) rename kleidicv/include/kleidicv/filters/{filter_2d_7x7_base.h => filter_2d_window_loader_7x7.h} (92%) rename kleidicv/include/kleidicv/filters/{filter_2d.h => process_fitler_2d.h} (50%) create mode 100644 kleidicv/src/filters/median_blur_sorting_network_3x3.h diff --git a/CHANGELOG.md b/CHANGELOG.md index f2d7c2602..cef1284ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ This changelog aims to follow the guiding principles of ## 0.6.0 - not yet released +### Added +- Median Blur for 3x3 kernels. + ## 0.5.0 - 2025-06-10 ### Added diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 25d0c4438..bf6658fe8 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -610,55 +610,69 @@ static void median_blur(benchmark::State& state, Function func) { } BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint8_t, 1, , kleidicv_median_blur_u8) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint8_t, 4, , kleidicv_median_blur_u8) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, int8_t, 1, , kleidicv_median_blur_s8) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, int8_t, 4, , kleidicv_median_blur_s8) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint16_t, 1, , kleidicv_median_blur_u16) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint16_t, 4, , kleidicv_median_blur_u16) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, int16_t, 1, , kleidicv_median_blur_s16) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, int16_t, 4, , kleidicv_median_blur_s16) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint32_t, 1, , kleidicv_median_blur_u32) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, uint32_t, 4, , kleidicv_median_blur_u32) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, int32_t, 1, , kleidicv_median_blur_s32) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, int32_t, 4, , kleidicv_median_blur_s32) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, float, 1, , kleidicv_median_blur_f32) + ->Arg(3) ->Arg(5) ->Arg(7); BENCHMARK_TEMPLATE2_CAPTURE(median_blur, float, 4, , kleidicv_median_blur_f32) + ->Arg(3) ->Arg(5) ->Arg(7); diff --git a/conformity/opencv/test_median_blur.cpp b/conformity/opencv/test_median_blur.cpp index eccedb299..a963d79e1 100644 --- a/conformity/opencv/test_median_blur.cpp +++ b/conformity/opencv/test_median_blur.cpp @@ -43,6 +43,19 @@ bool test_median_blur(int index, RecreatedMessageQueue& request_queue, std::vector& median_blur_tests_get() { // clang-format off static std::vector tests = { + TEST("Median 3x3, 1 channel (U8)", (test_median_blur<3, uint8_t, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (U8)", (test_median_blur<3, uint8_t, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (U8)", (test_median_blur<3, uint8_t, 4>), exec_median_blur<3>), + TEST("Median 3x3, 1 channel (U16)", (test_median_blur<3, uint16_t, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (U16)", (test_median_blur<3, uint16_t, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (U16)", (test_median_blur<3, uint16_t, 4>), exec_median_blur<3>), + TEST("Median 3x3, 1 channel (S16)", (test_median_blur<3, int16_t, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (S16)", (test_median_blur<3, int16_t, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (S16)", (test_median_blur<3, int16_t, 4>), exec_median_blur<3>), + TEST("Median 3x3, 1 channel (F32)", (test_median_blur<3, float, 1>), exec_median_blur<3>), + TEST("Median 3x3, 3 channel (F32)", (test_median_blur<3, float, 3>), exec_median_blur<3>), + TEST("Median 3x3, 4 channel (F32)", (test_median_blur<3, float, 4>), exec_median_blur<3>), + TEST("Median 5x5, 1 channel (U8)", (test_median_blur<5, uint8_t, 1>), exec_median_blur<5>), TEST("Median 5x5, 3 channel (U8)", (test_median_blur<5, uint8_t, 3>), exec_median_blur<5>), TEST("Median 5x5, 4 channel (U8)", (test_median_blur<5, uint8_t, 4>), exec_median_blur<5>), diff --git a/doc/functionality.md b/doc/functionality.md index b5646a1af..67c859361 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -79,7 +79,7 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Sobel (3x3) | | x | | | | | | | Separable Filter 2D (5x5) | | x | x | x | | | | | Gaussian Blur (3x3, 5x5, 7x7, 15x15, 21x21) | | x | | | | | | -| Median Blur (5x5, 7x7) | x | x | x | x | x | x | x | +| Median Blur (3x3, 5x5, 7x7) | x | x | x | x | x | x | x | ## Resize to quarter | | u8 | diff --git a/doc/opencv.md b/doc/opencv.md index 02b6b553c..5644dd241 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -166,7 +166,7 @@ Applies median filter to a given image. Notes on parameters: * `src.cols`,`src.rows` - image width and height must be greater than or equal to `ksize - 1` (i.e. `>= 4` for 5x5). -* `ksize` - only values 5 and 7 are supported (i.e. 5×5 and 7×7 kernels). +* `ksize` - only values 3, 5 and 7 are supported (i.e. 3x3, 5×5 and 7×7 kernels). ### [`cv::transpose()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga46630ed6c0ea6254a35f447289bd7404) Transposes a matrix. diff --git a/kleidicv/include/kleidicv/filters/filter_2d_5x5_neon.h b/kleidicv/include/kleidicv/filters/filter_2d_5x5_neon.h deleted file mode 100644 index b08bd114d..000000000 --- a/kleidicv/include/kleidicv/filters/filter_2d_5x5_neon.h +++ /dev/null @@ -1,101 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_FILTER_2D_5X5_NEON_H -#define KLEIDICV_FILTER_2D_5X5_NEON_H - -#include "filter_2d.h" -#include "filter_2d_5x5_base.h" -#include "kleidicv/neon.h" - -namespace KLEIDICV_TARGET_NAMESPACE { - -// Template for Filter2D 5x5. -template -class Filter2D - : public Filter2D5x5Base { - public: - using SourceType = typename InnerFilterType::SourceType; - using DestinationType = typename InnerFilterType::DestinationType; - using SourceVecTraits = typename neon::VecTraits; - using SourceVectorType = typename SourceVecTraits::VectorType; - using Base = Filter2D5x5Base; - using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5; - using BorderType = FixedBorderType; - using BorderOffsets = typename BorderInfoType::Offsets; - static constexpr size_t kMargin = 2UL; - explicit Filter2D(InnerFilterType filter) : filter_{filter} {} - - void process_pixels_without_horizontal_borders( - size_t width, Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const { - LoopUnroll2 loop{width * src_rows.channels(), - SourceVecTraits::num_lanes()}; - - loop.unroll_once([&](size_t index) { - SourceVectorType src[5][5]; - SourceVectorType dst_vec; - - auto KernelWindow = [&](size_t row, size_t col) -> SourceVectorType& { - return src[row][col]; - }; - - auto load_array_element = [](const SourceType& x) { return vld1q(&x); }; - Base::load_window(KernelWindow, load_array_element, src_rows, - window_row_offsets, window_col_offsets, index); - filter_.vector_path(KernelWindow, dst_vec); - - vst1q(&dst_rows[index], dst_vec); - }); - - loop.tail([&](size_t index) { - process_one_element_with_horizontal_borders( - src_rows, dst_rows, window_row_offsets, window_col_offsets, index); - }); - } - - void process_one_pixel_with_horizontal_borders( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { - for (size_t index = 0; index < src_rows.channels(); ++index) { - disable_loop_vectorization(); - process_one_element_with_horizontal_borders( - src_rows, dst_rows, window_row_offsets, window_col_offsets, index); - } - } - - private: - void process_one_element_with_horizontal_borders( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, - size_t index) const KLEIDICV_STREAMING_COMPATIBLE { - SourceType src[5][5]; - - auto KernelWindow = [&](size_t row, size_t col) - KLEIDICV_STREAMING_COMPATIBLE -> SourceType& { - return src[row][col]; - }; - - auto load_array_element = [&](const SourceType& x) - KLEIDICV_STREAMING_COMPATIBLE { return x; }; - - Base::load_window(KernelWindow, load_array_element, src_rows, - window_row_offsets, window_col_offsets, index); - - filter_.scalar_path(KernelWindow, dst_rows[index]); - } - - InnerFilterType filter_; -}; // end of class Filter2D - -// Shorthand for 5x5 2D filters driver type. -template -using Filter2D5x5 = Filter2D; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_FILTER_2D_5X5_NEON_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_5x5_sc.h b/kleidicv/include/kleidicv/filters/filter_2d_5x5_sc.h deleted file mode 100644 index 847433a8e..000000000 --- a/kleidicv/include/kleidicv/filters/filter_2d_5x5_sc.h +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_FILTER_2D_5X5_SC_H -#define KLEIDICV_FILTER_2D_5X5_SC_H - -#include "filter_2d.h" -#include "filter_2d_5x5_base.h" -#include "kleidicv/sve2.h" - -namespace KLEIDICV_TARGET_NAMESPACE { -// Template for Filter2D 5x5. -template -class Filter2D - : public Filter2D5x5Base { - public: - using SourceType = typename InnerFilterType::SourceType; - using DestinationType = typename InnerFilterType::DestinationType; - using SourceVecTraits = - typename KLEIDICV_TARGET_NAMESPACE::VecTraits; - using SourceVectorType = typename SourceVecTraits::VectorType; - using BorderInfoType = - typename KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5; - using BorderType = FixedBorderType; - using BorderOffsets = typename BorderInfoType::Offsets; - using Base = Filter2D5x5Base; - static constexpr size_t kMargin = 2UL; - explicit Filter2D(InnerFilterType filter) KLEIDICV_STREAMING_COMPATIBLE - : filter_{filter} {} - - void process_pixels_without_horizontal_borders( - size_t width, Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { - LoopUnroll2 loop{width * src_rows.channels(), - SourceVecTraits::num_lanes()}; - - loop.unroll_once([&](size_t index) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t pg = SourceVecTraits::svptrue(); - process_elements_with_vector_operation(src_rows, dst_rows, - window_row_offsets, - window_col_offsets, index, pg); - }); - - loop.remaining( - [&](size_t index, size_t length) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t pg = SourceVecTraits::svwhilelt(index, length); - process_elements_with_vector_operation(src_rows, dst_rows, - window_row_offsets, - window_col_offsets, index, pg); - }); - } - - void process_one_pixel_with_horizontal_borders( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { - for (size_t index = 0; index < src_rows.channels(); ++index) { - process_elements_with_vector_operation( - src_rows, dst_rows, window_row_offsets, window_col_offsets, index, - SourceVecTraits::template svptrue_pat()); - } - } - - private: - void process_elements_with_vector_operation( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, - size_t index, svbool_t pg) const KLEIDICV_STREAMING_COMPATIBLE { - SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_1_0, - src_1_1, src_1_2, src_1_3, src_1_4, src_2_0, src_2_1, src_2_2, src_2_3, - src_2_4, src_3_0, src_3_1, src_3_2, src_3_3, src_3_4, src_4_0, src_4_1, - src_4_2, src_4_3, src_4_4, output_vector; - - // Initialization - ScalableVectorArray2D KernelWindow = {{ - {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2), - std::ref(src_0_3), std::ref(src_0_4)}, - {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2), - std::ref(src_1_3), std::ref(src_1_4)}, - {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2), - std::ref(src_2_3), std::ref(src_2_4)}, - {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2), - std::ref(src_3_3), std::ref(src_3_4)}, - {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2), - std::ref(src_4_3), std::ref(src_4_4)}, - }}; - - auto load_array_element = - [&](const SourceType& x) - KLEIDICV_STREAMING_COMPATIBLE { return svld1(pg, &x); }; - - Base::load_window(KernelWindow, load_array_element, src_rows, - window_row_offsets, window_col_offsets, index); - filter_.vector_path(KernelWindow, output_vector, pg); - svst1(pg, &dst_rows[index], output_vector); - } - - InnerFilterType filter_; -}; // end of class Filter2D - -// Shorthand for 5x5 2D filters driver type. -template -using Filter2D5x5 = Filter2D; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_FILTER_2D_5X5_SC_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_7x7_neon.h b/kleidicv/include/kleidicv/filters/filter_2d_7x7_neon.h deleted file mode 100644 index 81aac2b70..000000000 --- a/kleidicv/include/kleidicv/filters/filter_2d_7x7_neon.h +++ /dev/null @@ -1,103 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_FILTER_2D_7X7_NEON_H -#define KLEIDICV_FILTER_2D_7X7_NEON_H - -#include "filter_2d.h" -#include "filter_2d_7x7_base.h" -#include "kleidicv/neon.h" - -namespace KLEIDICV_TARGET_NAMESPACE { - -// Template for Filter2D 7x7. -template -class Filter2D - : public Filter2D7x7Base { - public: - using SourceType = typename InnerFilterType::SourceType; - using DestinationType = typename InnerFilterType::DestinationType; - using SourceVecTraits = typename neon::VecTraits; - using SourceVectorType = typename SourceVecTraits::VectorType; - using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo7x7; - using BorderType = FixedBorderType; - using BorderOffsets = typename BorderInfoType::Offsets; - static constexpr size_t kMargin = 3UL; - explicit Filter2D(InnerFilterType filter) : filter_{filter} {} - using Base = Filter2D7x7Base; - - void process_pixels_without_horizontal_borders( - size_t width, Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const { - LoopUnroll2 loop{width * src_rows.channels(), - SourceVecTraits::num_lanes()}; - - loop.unroll_once([&](size_t index) { - SourceVectorType src[7][7]; - SourceVectorType dst_vec; - - auto KernelWindow = - [&](size_t row, size_t col) - KLEIDICV_STREAMING_COMPATIBLE -> SourceVectorType& { - return src[row][col]; - }; - - auto load_array_element = [](const SourceType& x) { return vld1q(&x); }; - Base::load_window(KernelWindow, load_array_element, src_rows, - window_row_offsets, window_col_offsets, index); - - filter_.vector_path(KernelWindow, dst_vec); - - vst1q(&dst_rows[index], dst_vec); - }); - - loop.tail([&](size_t index) { - process_one_element_with_horizontal_borders( - src_rows, dst_rows, window_row_offsets, window_col_offsets, index); - }); - } - - void process_one_pixel_with_horizontal_borders( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { - for (size_t index = 0; index < src_rows.channels(); ++index) { - disable_loop_vectorization(); - process_one_element_with_horizontal_borders( - src_rows, dst_rows, window_row_offsets, window_col_offsets, index); - } - } - - private: - void process_one_element_with_horizontal_borders( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, - size_t index) const KLEIDICV_STREAMING_COMPATIBLE { - SourceType src[7][7]; - - auto KernelWindow = [&](size_t row, size_t col) - KLEIDICV_STREAMING_COMPATIBLE -> SourceType& { - return src[row][col]; - }; - - auto load_array_element = [&](const SourceType& x) - KLEIDICV_STREAMING_COMPATIBLE { return x; }; - - Base::load_window(KernelWindow, load_array_element, src_rows, - window_row_offsets, window_col_offsets, index); - - filter_.scalar_path(KernelWindow, dst_rows[index]); - } - InnerFilterType filter_; -}; // end of class Filter2D - -// Shorthand for 7x7 2D filters driver type. -template -using Filter2D7x7 = Filter2D; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_FILTER_2D_7X7_NEON_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_7x7_sc.h b/kleidicv/include/kleidicv/filters/filter_2d_7x7_sc.h deleted file mode 100644 index 529b3c637..000000000 --- a/kleidicv/include/kleidicv/filters/filter_2d_7x7_sc.h +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_FILTER_2D_7X7_SC_H -#define KLEIDICV_FILTER_2D_7X7_SC_H - -#include "filter_2d.h" -#include "filter_2d_7x7_base.h" -#include "kleidicv/sve2.h" - -namespace KLEIDICV_TARGET_NAMESPACE { -// Template for Filter2D 7x7. -template -class Filter2D - : public Filter2D7x7Base { - public: - using SourceType = typename InnerFilterType::SourceType; - using DestinationType = typename InnerFilterType::DestinationType; - using SourceVecTraits = - typename KLEIDICV_TARGET_NAMESPACE::VecTraits; - using SourceVectorType = typename SourceVecTraits::VectorType; - using BorderInfoType = - typename KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo7x7; - using BorderType = FixedBorderType; - using BorderOffsets = typename BorderInfoType::Offsets; - using Base = Filter2D7x7Base; - static constexpr size_t kMargin = 3UL; - explicit Filter2D(InnerFilterType filter) KLEIDICV_STREAMING_COMPATIBLE - : filter_{filter} {} - - void process_pixels_without_horizontal_borders( - size_t width, Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { - LoopUnroll2 loop{width * src_rows.channels(), - SourceVecTraits::num_lanes()}; - - loop.unroll_once([&](size_t index) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t pg = SourceVecTraits::svptrue(); - process_elements_with_vector_operation(src_rows, dst_rows, - window_row_offsets, - window_col_offsets, index, pg); - }); - - loop.remaining( - [&](size_t index, size_t length) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t pg = SourceVecTraits::svwhilelt(index, length); - process_elements_with_vector_operation(src_rows, dst_rows, - window_row_offsets, - window_col_offsets, index, pg); - }); - } - - void process_one_pixel_with_horizontal_borders( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { - for (size_t index = 0; index < src_rows.channels(); ++index) { - process_elements_with_vector_operation( - src_rows, dst_rows, window_row_offsets, window_col_offsets, index, - SourceVecTraits::template svptrue_pat()); - } - } - - private: - void process_elements_with_vector_operation( - Rows src_rows, Rows dst_rows, - BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, - size_t index, svbool_t pg) const KLEIDICV_STREAMING_COMPATIBLE { - SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_0_5, - src_0_6, src_1_0, src_1_1, src_1_2, src_1_3, src_1_4, src_1_5, src_1_6, - src_2_0, src_2_1, src_2_2, src_2_3, src_2_4, src_2_5, src_2_6, src_3_0, - src_3_1, src_3_2, src_3_3, src_3_4, src_3_5, src_3_6, src_4_0, src_4_1, - src_4_2, src_4_3, src_4_4, src_4_5, src_4_6, src_5_0, src_5_1, src_5_2, - src_5_3, src_5_4, src_5_5, src_5_6, src_6_0, src_6_1, src_6_2, src_6_3, - src_6_4, src_6_5, src_6_6, output_vector; - - // Initialization - ScalableVectorArray2D KernelWindow = {{ - {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2), - std::ref(src_0_3), std::ref(src_0_4), std::ref(src_0_5), - std::ref(src_0_6)}, - {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2), - std::ref(src_1_3), std::ref(src_1_4), std::ref(src_1_5), - std::ref(src_1_6)}, - {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2), - std::ref(src_2_3), std::ref(src_2_4), std::ref(src_2_5), - std::ref(src_2_6)}, - {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2), - std::ref(src_3_3), std::ref(src_3_4), std::ref(src_3_5), - std::ref(src_3_6)}, - {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2), - std::ref(src_4_3), std::ref(src_4_4), std::ref(src_4_5), - std::ref(src_4_6)}, - {std::ref(src_5_0), std::ref(src_5_1), std::ref(src_5_2), - std::ref(src_5_3), std::ref(src_5_4), std::ref(src_5_5), - std::ref(src_5_6)}, - {std::ref(src_6_0), std::ref(src_6_1), std::ref(src_6_2), - std::ref(src_6_3), std::ref(src_6_4), std::ref(src_6_5), - std::ref(src_6_6)}, - }}; - - auto load_array_element = - [&](const SourceType& x) - KLEIDICV_STREAMING_COMPATIBLE { return svld1(pg, &x); }; - - Base::load_window(KernelWindow, load_array_element, src_rows, - window_row_offsets, window_col_offsets, index); - filter_.vector_path(KernelWindow, output_vector, pg); - svst1(pg, &dst_rows[index], output_vector); - } - - InnerFilterType filter_; -}; // end of class Filter2D - -// Shorthand for 7x7 2D filters driver type. -template -using Filter2D7x7 = Filter2D; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_FILTER_2D_7X7_SC_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_neon.h b/kleidicv/include/kleidicv/filters/filter_2d_neon.h new file mode 100644 index 000000000..f700a063d --- /dev/null +++ b/kleidicv/include/kleidicv/filters/filter_2d_neon.h @@ -0,0 +1,177 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_FILTER_2D_NEON_H +#define KLEIDICV_FILTER_2D_NEON_H + +#include "filter_2d_window_loader_3x3.h" +#include "filter_2d_window_loader_5x5.h" +#include "filter_2d_window_loader_7x7.h" +#include "kleidicv/neon.h" +#include "process_fitler_2d.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +template +class Filter2d { + public: + using SourceType = typename InnerFilterType::SourceType; + using DestinationType = typename InnerFilterType::DestinationType; + using SourceVecTraits = typename neon::VecTraits; + using SourceVectorType = typename SourceVecTraits::VectorType; + using BorderType = FixedBorderType; + static constexpr size_t kMargin = KSize / 2; + using BorderInfoType = + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; + using BorderOffsets = typename BorderInfoType::Offsets; + + explicit Filter2d(InnerFilterType filter) : filter_{filter} {} + + void process_pixels_without_horizontal_borders( + size_t width, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, + BorderOffsets window_col_offsets) const { + LoopUnroll2 loop{width * src_rows.channels(), + SourceVecTraits::num_lanes()}; + + loop.unroll_once([&](size_t index) { + SourceVectorType src[KSize][KSize]; + SourceVectorType dst_vec; + + auto KernelWindow = [&](size_t row, size_t col) -> SourceVectorType& { + return src[row][col]; + }; + + auto load_array_element = [](const SourceType& x) { return vld1q(&x); }; + WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows, + window_row_offsets, window_col_offsets, + index); + + filter_.vector_path(KernelWindow, dst_vec); + vst1q(&dst_rows[index], dst_vec); + }); + + loop.tail([&](size_t index) { + process_one_element_with_horizontal_borders( + src_rows, dst_rows, window_row_offsets, window_col_offsets, index); + }); + } + + void process_pixels_of_dual_rows_without_horizontal_borders( + size_t width, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1, + BorderOffsets window_col_offsets) const { + LoopUnroll2 loop{width * src_rows.channels(), + SourceVecTraits::num_lanes()}; + + loop.unroll_once([&](size_t index) { + SourceVectorType src[KSize + 1][KSize]; + SourceVectorType dst_vec_0; + SourceVectorType dst_vec_1; + auto KernelWindow = [&](size_t row, size_t col) -> SourceVectorType& { + return src[row][col]; + }; + + auto load_array_element = [](const SourceType& x) { return vld1q(&x); }; + WindowLoaderType::load_window_to_handle_dual_rows( + KernelWindow, load_array_element, src_rows, window_row_offsets_0, + window_row_offsets_1, window_col_offsets, index); + + filter_.vector_path_for_dual_row_handling(KernelWindow, dst_vec_0, + dst_vec_1); + vst1q(&dst_rows.at(0, 0)[index], dst_vec_0); + vst1q(&dst_rows.at(1, 0)[index], dst_vec_1); + }); + + loop.tail([&](size_t index) { + process_two_element_vertically_with_or_without_horizontal_borders( + src_rows, dst_rows, window_row_offsets_0, window_row_offsets_1, + window_col_offsets, index); + }); + } + + void process_one_pixel_with_horizontal_borders( + Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, + BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { + for (size_t index = 0; index < src_rows.channels(); ++index) { + disable_loop_vectorization(); + process_one_element_with_horizontal_borders( + src_rows, dst_rows, window_row_offsets, window_col_offsets, index); + } + } + + // Processes two vertically adjacent pixels in a single column + void process_two_pixels_with_horizontal_borders( + Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1, + BorderOffsets window_col_offsets) const { + for (size_t index = 0; index < src_rows.channels(); ++index) { + disable_loop_vectorization(); + process_two_element_vertically_with_or_without_horizontal_borders( + src_rows, dst_rows, window_row_offsets_0, window_row_offsets_1, + window_col_offsets, index); + } + } + + private: + void process_one_element_with_horizontal_borders( + Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, + size_t index) const KLEIDICV_STREAMING_COMPATIBLE { + SourceType src[KSize][KSize]; + + auto KernelWindow = [&](size_t row, size_t col) + KLEIDICV_STREAMING_COMPATIBLE -> SourceType& { + return src[row][col]; + }; + + auto load_array_element = [&](const SourceType& x) + KLEIDICV_STREAMING_COMPATIBLE { return x; }; + + WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows, + window_row_offsets, window_col_offsets, + index); + + filter_.scalar_path(KernelWindow, dst_rows[index]); + } + + void process_two_element_vertically_with_or_without_horizontal_borders( + Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1, + BorderOffsets window_col_offsets, size_t index) const { + SourceType src[KSize + 1][KSize]; + auto KernelWindow = [&](size_t row, size_t col) -> SourceType& { + return src[row][col]; + }; + auto load_array_element = [](const SourceType& x) { return x; }; + WindowLoaderType::load_window_to_handle_dual_rows( + KernelWindow, load_array_element, src_rows, window_row_offsets_0, + window_row_offsets_1, window_col_offsets, index); + + filter_.scalar_path_for_dual_row_handling( + KernelWindow, dst_rows.at(0, 0)[index], dst_rows.at(1, 0)[index]); + } + + InnerFilterType filter_; +}; + +template +using Filter2D3x3 = + Filter2d>; + +template +using Filter2D5x5 = + Filter2d>; + +template +using Filter2D7x7 = + Filter2d>; + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_FILTER_2D_NEON_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_sc.h b/kleidicv/include/kleidicv/filters/filter_2d_sc.h new file mode 100644 index 000000000..2783849eb --- /dev/null +++ b/kleidicv/include/kleidicv/filters/filter_2d_sc.h @@ -0,0 +1,309 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_FILTER_2D_SC_H +#define KLEIDICV_FILTER_2D_SC_H + +#include "filter_2d_window_loader_3x3.h" +#include "filter_2d_window_loader_5x5.h" +#include "filter_2d_window_loader_7x7.h" +#include "kleidicv/sve2.h" +#include "process_fitler_2d.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +template +class Filter2D3x3VectorOperations { + public: + using BorderInfoType = + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; + using BorderOffsets = typename BorderInfoType::Offsets; + + template + static void process_one_element_with_vector_operation( + svbool_t pg, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, + size_t index, + const InnerFilterType& filter_) KLEIDICV_STREAMING_COMPATIBLE { + SourceVectorType src_0_0, src_0_1, src_0_2, src_1_0, src_1_1, src_1_2, + src_2_0, src_2_1, src_2_2, dst_vec; + ScalableVectorArray2D KernelWindow = {{ + {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2)}, + {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2)}, + {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2)}, + }}; + + auto load_array_element = + [&](const SourceType& x) + KLEIDICV_STREAMING_COMPATIBLE { return svld1(pg, &x); }; + + WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows, + window_row_offsets, window_col_offsets, + index); + filter_.vector_path(pg, KernelWindow, dst_vec); + + svst1(pg, &dst_rows[index], dst_vec); + } + + template + static void process_two_elements_with_vector_operation( + svbool_t pg, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1, + BorderOffsets window_col_offsets, size_t index, + const InnerFilterType& filter_) KLEIDICV_STREAMING_COMPATIBLE { + SourceVectorType src_0_0, src_0_1, src_0_2, src_1_0, src_1_1, src_1_2, + src_2_0, src_2_1, src_2_2, src_3_0, src_3_1, src_3_2, dst_vec_0, + dst_vec_1; + + ScalableVectorArray2D KernelWindow = {{ + {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2)}, + {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2)}, + {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2)}, + {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2)}, + }}; + + auto load_array_element = + [&](const SourceType& x) + KLEIDICV_STREAMING_COMPATIBLE { return svld1(pg, &x); }; + + WindowLoaderType::load_window_to_handle_dual_rows( + KernelWindow, load_array_element, src_rows, window_row_offsets_0, + window_row_offsets_1, window_col_offsets, index); + + filter_.vector_path_for_dual_row_handling( + pg, KernelWindow, dst_vec_0, + dst_vec_1); // dst_rows.at(0, kMargin) + svst1(pg, &dst_rows.at(0, 0)[index], dst_vec_0); + svst1(pg, &dst_rows.at(1, 0)[index], dst_vec_1); + } +}; + +template +class Filter2D5x5VectorOperations { + public: + using BorderInfoType = + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; + using BorderOffsets = typename BorderInfoType::Offsets; + + template + static void process_one_element_with_vector_operation( + svbool_t pg, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, + size_t index, + const InnerFilterType& filter_) KLEIDICV_STREAMING_COMPATIBLE { + SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_1_0, + src_1_1, src_1_2, src_1_3, src_1_4, src_2_0, src_2_1, src_2_2, src_2_3, + src_2_4, src_3_0, src_3_1, src_3_2, src_3_3, src_3_4, src_4_0, src_4_1, + src_4_2, src_4_3, src_4_4, output_vector; + + // Initialization + ScalableVectorArray2D KernelWindow = {{ + {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2), + std::ref(src_0_3), std::ref(src_0_4)}, + {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2), + std::ref(src_1_3), std::ref(src_1_4)}, + {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2), + std::ref(src_2_3), std::ref(src_2_4)}, + {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2), + std::ref(src_3_3), std::ref(src_3_4)}, + {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2), + std::ref(src_4_3), std::ref(src_4_4)}, + }}; + + auto load_array_element = + [&](const SourceType& x) + KLEIDICV_STREAMING_COMPATIBLE { return svld1(pg, &x); }; + + WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows, + window_row_offsets, window_col_offsets, + index); + filter_.vector_path(pg, KernelWindow, output_vector); + svst1(pg, &dst_rows[index], output_vector); + } +}; + +template +class Filter2D7x7VectorOperations { + public: + using BorderInfoType = + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; + using BorderOffsets = typename BorderInfoType::Offsets; + + template + static void process_one_element_with_vector_operation( + svbool_t pg, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, BorderOffsets window_col_offsets, + size_t index, + const InnerFilterType& filter_) KLEIDICV_STREAMING_COMPATIBLE { + SourceVectorType src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_0_5, + src_0_6, src_1_0, src_1_1, src_1_2, src_1_3, src_1_4, src_1_5, src_1_6, + src_2_0, src_2_1, src_2_2, src_2_3, src_2_4, src_2_5, src_2_6, src_3_0, + src_3_1, src_3_2, src_3_3, src_3_4, src_3_5, src_3_6, src_4_0, src_4_1, + src_4_2, src_4_3, src_4_4, src_4_5, src_4_6, src_5_0, src_5_1, src_5_2, + src_5_3, src_5_4, src_5_5, src_5_6, src_6_0, src_6_1, src_6_2, src_6_3, + src_6_4, src_6_5, src_6_6, output_vector; + + // Initialization + ScalableVectorArray2D KernelWindow = {{ + {std::ref(src_0_0), std::ref(src_0_1), std::ref(src_0_2), + std::ref(src_0_3), std::ref(src_0_4), std::ref(src_0_5), + std::ref(src_0_6)}, + {std::ref(src_1_0), std::ref(src_1_1), std::ref(src_1_2), + std::ref(src_1_3), std::ref(src_1_4), std::ref(src_1_5), + std::ref(src_1_6)}, + {std::ref(src_2_0), std::ref(src_2_1), std::ref(src_2_2), + std::ref(src_2_3), std::ref(src_2_4), std::ref(src_2_5), + std::ref(src_2_6)}, + {std::ref(src_3_0), std::ref(src_3_1), std::ref(src_3_2), + std::ref(src_3_3), std::ref(src_3_4), std::ref(src_3_5), + std::ref(src_3_6)}, + {std::ref(src_4_0), std::ref(src_4_1), std::ref(src_4_2), + std::ref(src_4_3), std::ref(src_4_4), std::ref(src_4_5), + std::ref(src_4_6)}, + {std::ref(src_5_0), std::ref(src_5_1), std::ref(src_5_2), + std::ref(src_5_3), std::ref(src_5_4), std::ref(src_5_5), + std::ref(src_5_6)}, + {std::ref(src_6_0), std::ref(src_6_1), std::ref(src_6_2), + std::ref(src_6_3), std::ref(src_6_4), std::ref(src_6_5), + std::ref(src_6_6)}, + }}; + + auto load_array_element = + [&](const SourceType& x) + KLEIDICV_STREAMING_COMPATIBLE { return svld1(pg, &x); }; + + WindowLoaderType::load_window(KernelWindow, load_array_element, src_rows, + window_row_offsets, window_col_offsets, + index); + filter_.vector_path(pg, KernelWindow, output_vector); + svst1(pg, &dst_rows[index], output_vector); + } +}; + +template +class Filter2d { + public: + using SourceType = typename InnerFilterType::SourceType; + using DestinationType = typename InnerFilterType::DestinationType; + using SourceVecTraits = + typename KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SourceVectorType = typename SourceVecTraits::VectorType; + using BorderInfoType = + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; + using BorderType = FixedBorderType; + using BorderOffsets = typename BorderInfoType::Offsets; + // using Base = VectorOperationProviderType; + static constexpr size_t kMargin = KSize / 2UL; + explicit Filter2d(InnerFilterType filter) KLEIDICV_STREAMING_COMPATIBLE + : filter_{filter} {} + + void process_pixels_without_horizontal_borders( + size_t width, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, + BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { + LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()}; + + loop.unroll_once([&](size_t index) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = SourceVecTraits::svptrue(); + VectorOperationProviderType:: + template process_one_element_with_vector_operation( + pg, src_rows, dst_rows, window_row_offsets, window_col_offsets, + index, filter_); + }); + + loop.remaining([&](size_t index, + size_t length) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = SourceVecTraits::svwhilelt(index, length); + VectorOperationProviderType:: + template process_one_element_with_vector_operation( + pg, src_rows, dst_rows, window_row_offsets, window_col_offsets, + index, filter_); + }); + } + + void process_one_pixel_with_horizontal_borders( + Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets, + BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { + for (size_t index = 0; index < src_rows.channels(); ++index) { + VectorOperationProviderType:: + template process_one_element_with_vector_operation( + SourceVecTraits::template svptrue_pat(), src_rows, + dst_rows, window_row_offsets, window_col_offsets, index, filter_); + } + } + + void process_pixels_of_dual_rows_without_horizontal_borders( + size_t width, Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1, + BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { + LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()}; + loop.unroll_once([&](size_t index) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = SourceVecTraits::svptrue(); + VectorOperationProviderType:: + template process_two_elements_with_vector_operation( + pg, src_rows, dst_rows, window_row_offsets_0, + window_row_offsets_1, window_col_offsets, index, filter_); + }); + + loop.remaining([&](size_t index, + size_t length) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = SourceVecTraits::svwhilelt(index, length); + VectorOperationProviderType:: + template process_two_elements_with_vector_operation( + pg, src_rows, dst_rows, window_row_offsets_0, + window_row_offsets_1, window_col_offsets, index, filter_); + }); + } + + // Processes two vertically adjacent pixels in a single column + void process_two_pixels_with_horizontal_borders( + Rows src_rows, Rows dst_rows, + BorderOffsets window_row_offsets_0, BorderOffsets window_row_offsets_1, + BorderOffsets window_col_offsets) const KLEIDICV_STREAMING_COMPATIBLE { + for (size_t index = 0; index < src_rows.channels(); ++index) { + VectorOperationProviderType:: + template process_two_elements_with_vector_operation( + SourceVecTraits::template svptrue_pat(), src_rows, + dst_rows, window_row_offsets_0, window_row_offsets_1, + window_col_offsets, index, filter_); + } + } + + private: + InnerFilterType filter_; +}; + +// Shorthand for 3x3 2D filters driver type. +template +using Filter2D3x3 = Filter2d< + InnerFilterType, 3UL, + Filter2D3x3VectorOperations< + typename InnerFilterType::SourceType, + Filter2dWindowLoader3x3>>; + +template +using Filter2D5x5 = Filter2d< + InnerFilterType, 5UL, + Filter2D5x5VectorOperations< + typename InnerFilterType::SourceType, + Filter2dWindowLoader5x5>>; + +template +using Filter2D7x7 = Filter2d< + InnerFilterType, 7UL, + Filter2D7x7VectorOperations< + typename InnerFilterType::SourceType, + Filter2dWindowLoader7x7>>; + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_FILTER_2D_SC_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_window_loader_3x3.h b/kleidicv/include/kleidicv/filters/filter_2d_window_loader_3x3.h new file mode 100644 index 000000000..d48162ca7 --- /dev/null +++ b/kleidicv/include/kleidicv/filters/filter_2d_window_loader_3x3.h @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_FILTER_2D_WINDOW_LOADER_3X3_H +#define KLEIDICV_FILTER_2D_WINDOW_LOADER_3X3_H + +#include "kleidicv/types.h" +#include "kleidicv/workspace/border_3x3.h" + +namespace KLEIDICV_TARGET_NAMESPACE { +template +class Filter2dWindowLoader3x3 { + public: + using BorderInfoType = + typename KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo3x3; + using BorderOffsets = typename BorderInfoType::Offsets; + + template + static void load_window(KernelWindowFunctor& KernelWindow, + LoadArrayElementFunctionType load_array_element, + Rows src_rows, + BorderOffsets window_row_offsets, + BorderOffsets window_col_offsets, + size_t index) KLEIDICV_STREAMING_COMPATIBLE { + KernelWindow(0, 0) = load_array_element( + src_rows.at(window_row_offsets.c0(), window_col_offsets.c0())[index]); + KernelWindow(0, 1) = load_array_element( + src_rows.at(window_row_offsets.c0(), window_col_offsets.c1())[index]); + KernelWindow(0, 2) = load_array_element( + src_rows.at(window_row_offsets.c0(), window_col_offsets.c2())[index]); + + KernelWindow(1, 0) = load_array_element( + src_rows.at(window_row_offsets.c1(), window_col_offsets.c0())[index]); + KernelWindow(1, 1) = load_array_element( + src_rows.at(window_row_offsets.c1(), window_col_offsets.c1())[index]); + KernelWindow(1, 2) = load_array_element( + src_rows.at(window_row_offsets.c1(), window_col_offsets.c2())[index]); + + KernelWindow(2, 0) = load_array_element( + src_rows.at(window_row_offsets.c2(), window_col_offsets.c0())[index]); + KernelWindow(2, 1) = load_array_element( + src_rows.at(window_row_offsets.c2(), window_col_offsets.c1())[index]); + KernelWindow(2, 2) = load_array_element( + src_rows.at(window_row_offsets.c2(), window_col_offsets.c2())[index]); + } + + template + static void load_window_to_handle_dual_rows( + KernelWindowFunctor& KernelWindow, + LoadArrayElementFunctionType load_array_element, + Rows src_rows, BorderOffsets window_row_offsets_0, + BorderOffsets window_row_offsets_1, BorderOffsets window_col_offsets, + size_t index) KLEIDICV_STREAMING_COMPATIBLE { + load_window(KernelWindow, load_array_element, src_rows, + window_row_offsets_0, window_col_offsets, index); + + KernelWindow(3, 0) = load_array_element(src_rows.at( + window_row_offsets_1.c2() + 1, window_col_offsets.c0())[index]); + KernelWindow(3, 1) = load_array_element(src_rows.at( + window_row_offsets_1.c2() + 1, window_col_offsets.c1())[index]); + KernelWindow(3, 2) = load_array_element(src_rows.at( + window_row_offsets_1.c2() + 1, window_col_offsets.c2())[index]); + } +}; +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_FILTER_2D_WINDOW_LOADER_3X3_BASE_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_5x5_base.h b/kleidicv/include/kleidicv/filters/filter_2d_window_loader_5x5.h similarity index 86% rename from kleidicv/include/kleidicv/filters/filter_2d_5x5_base.h rename to kleidicv/include/kleidicv/filters/filter_2d_window_loader_5x5.h index 8f4d92921..94820a0c6 100644 --- a/kleidicv/include/kleidicv/filters/filter_2d_5x5_base.h +++ b/kleidicv/include/kleidicv/filters/filter_2d_window_loader_5x5.h @@ -2,26 +2,26 @@ // // SPDX-License-Identifier: Apache-2.0 -#ifndef KLEIDICV_FILTER_2D_5X5_BASE_H -#define KLEIDICV_FILTER_2D_5X5_BASE_H +#ifndef KLEIDICV_FILTER_2D_WINDOW_LOADER_5X5_H +#define KLEIDICV_FILTER_2D_WINDOW_LOADER_5X5_H #include "kleidicv/workspace/border_5x5.h" namespace KLEIDICV_TARGET_NAMESPACE { template -class Filter2D5x5Base { - protected: +class Filter2dWindowLoader5x5 { + public: using BorderInfoType = typename KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5; using BorderOffsets = typename BorderInfoType::Offsets; template - void load_window(KernelWindowFunctor& KernelWindow, - LoadArrayElementFunctionType load_array_element, - Rows src_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets, - size_t index) const KLEIDICV_STREAMING_COMPATIBLE { + static void load_window(KernelWindowFunctor& KernelWindow, + LoadArrayElementFunctionType load_array_element, + Rows src_rows, + BorderOffsets window_row_offsets, + BorderOffsets window_col_offsets, + size_t index) KLEIDICV_STREAMING_COMPATIBLE { KernelWindow(0, 0) = load_array_element( src_rows.at(window_row_offsets.c0(), window_col_offsets.c0())[index]); KernelWindow(0, 1) = load_array_element( @@ -76,4 +76,4 @@ class Filter2D5x5Base { }; } // namespace KLEIDICV_TARGET_NAMESPACE -#endif // KLEIDICV_FILTER_2D_5X5_BASE_H +#endif // KLEIDICV_FILTER_2D_WINDOW_LOADER_5X5_H diff --git a/kleidicv/include/kleidicv/filters/filter_2d_7x7_base.h b/kleidicv/include/kleidicv/filters/filter_2d_window_loader_7x7.h similarity index 92% rename from kleidicv/include/kleidicv/filters/filter_2d_7x7_base.h rename to kleidicv/include/kleidicv/filters/filter_2d_window_loader_7x7.h index 0fda13274..f232f8fbd 100644 --- a/kleidicv/include/kleidicv/filters/filter_2d_7x7_base.h +++ b/kleidicv/include/kleidicv/filters/filter_2d_window_loader_7x7.h @@ -2,26 +2,26 @@ // // SPDX-License-Identifier: Apache-2.0 -#ifndef KLEIDICV_FILTER_2D_7X7_BASE_H -#define KLEIDICV_FILTER_2D_7X7_BASE_H +#ifndef KLEIDICV_FILTER_2D_WINDOW_LOADER_7X7_H +#define KLEIDICV_FILTER_2D_WINDOW_LOADER_7X7_H #include "kleidicv/workspace/border_7x7.h" namespace KLEIDICV_TARGET_NAMESPACE { template -class Filter2D7x7Base { - protected: +class Filter2dWindowLoader7x7 { + public: using BorderInfoType = typename KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo7x7; using BorderOffsets = typename BorderInfoType::Offsets; template - void load_window(KernelWindowFunctor& KernelWindow, - LoadArrayElementFunctionType load_array_element, - Rows src_rows, - BorderOffsets window_row_offsets, - BorderOffsets window_col_offsets, - size_t index) const KLEIDICV_STREAMING_COMPATIBLE { + static void load_window(KernelWindowFunctor& KernelWindow, + LoadArrayElementFunctionType load_array_element, + Rows src_rows, + BorderOffsets window_row_offsets, + BorderOffsets window_col_offsets, + size_t index) KLEIDICV_STREAMING_COMPATIBLE { // first row KernelWindow(0, 0) = load_array_element( src_rows.at(window_row_offsets.c0(), window_col_offsets.c0())[index]); @@ -132,4 +132,4 @@ class Filter2D7x7Base { } // namespace KLEIDICV_TARGET_NAMESPACE -#endif // KLEIDICV_FILTER_2D_7X7_BASE_H +#endif // KLEIDICV_FILTER_2D_WINDOW_LOADER_7X7_H diff --git a/kleidicv/include/kleidicv/filters/median_blur.h b/kleidicv/include/kleidicv/filters/median_blur.h index 8a1eebf94..2ee23d829 100644 --- a/kleidicv/include/kleidicv/filters/median_blur.h +++ b/kleidicv/include/kleidicv/filters/median_blur.h @@ -112,7 +112,7 @@ inline std::pair median_blur_is_implemented( if ((src != dst) && (channels <= KLEIDICV_MAXIMUM_CHANNEL_COUNT) && (kernel_width == kernel_height) && (height >= kernel_height - 1) && (width >= kernel_width - 1) && - ((kernel_width == 5) || (kernel_width == 7)) && + ((kernel_width == 3) || (kernel_width == 5) || (kernel_width == 7)) && fixed_border_type.has_value()) { return std::make_pair(KLEIDICV_OK, *fixed_border_type); } diff --git a/kleidicv/include/kleidicv/filters/filter_2d.h b/kleidicv/include/kleidicv/filters/process_fitler_2d.h similarity index 50% rename from kleidicv/include/kleidicv/filters/filter_2d.h rename to kleidicv/include/kleidicv/filters/process_fitler_2d.h index cac7c71ad..0c8f1a766 100644 --- a/kleidicv/include/kleidicv/filters/filter_2d.h +++ b/kleidicv/include/kleidicv/filters/process_fitler_2d.h @@ -2,8 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 -#ifndef KLEIDICV_FILTERS_FILTER_2D_H -#define KLEIDICV_FILTERS_FILTER_2D_H +#ifndef KLEIDICV_PROCESS_FILTERS_FILTER_2D_H +#define KLEIDICV_PROCESS_FILTERS_FILTER_2D_H #include "kleidicv/kleidicv.h" #include "kleidicv/types.h" @@ -67,6 +67,67 @@ void process_filter2d(Rectangle rect, size_t y_begin, size_t y_end, } } +template +void process_filter2d_by_dual_rows( + Rectangle rect, size_t y_begin, size_t y_end, + Rows src_rows, + Rows dst_rows, + typename FilterType::BorderType border_type, + FilterType filter) KLEIDICV_STREAMING_COMPATIBLE { + // Border helper which calculates border offsets. + typename FilterType::BorderInfoType vertical_border{rect.height(), + border_type}; + typename FilterType::BorderInfoType horizontal_border{rect.width(), + border_type}; + constexpr size_t kMargin = filter.kMargin; + size_t vertical_index = y_begin; + + for (; vertical_index < y_end - 1; vertical_index += 2) { + // Recalculate vertical border offsets. + auto vertical_offsets_0 = + vertical_border.offsets_with_border(vertical_index); + auto vertical_offsets_1 = + vertical_border.offsets_with_border(vertical_index + 1); + + // Process data affected by left border. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t horizontal_index = 0; horizontal_index < kMargin; + ++horizontal_index) { + auto horizontal_offsets = + horizontal_border.offsets_with_left_border(horizontal_index); + filter.process_two_pixels_with_horizontal_borders( + src_rows.at(vertical_index, horizontal_index), + dst_rows.at(vertical_index, horizontal_index), vertical_offsets_0, + vertical_offsets_1, horizontal_offsets); + } + + // Process data which is not affected by any borders in bulk. + size_t width_without_borders = rect.width() - (2 * kMargin); + auto horizontal_offsets = horizontal_border.offsets_without_border(); + filter.process_pixels_of_dual_rows_without_horizontal_borders( + width_without_borders, src_rows.at(vertical_index, kMargin), + dst_rows.at(vertical_index, kMargin), vertical_offsets_0, + vertical_offsets_1, horizontal_offsets); + + // Process data affected by right border. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t horizontal_index = 0; horizontal_index < kMargin; + ++horizontal_index) { + size_t index = rect.width() - kMargin + horizontal_index; + auto horizontal_offsets = + horizontal_border.offsets_with_right_border(index); + + filter.process_two_pixels_with_horizontal_borders( + src_rows.at(vertical_index, index), + dst_rows.at(vertical_index, index), vertical_offsets_0, + vertical_offsets_1, horizontal_offsets); + } + } + + process_filter2d(rect, vertical_index, y_end, src_rows, dst_rows, border_type, + filter); +} + } // namespace KLEIDICV_TARGET_NAMESPACE -#endif // KLEIDICV_FILTERS_FILTER_2D_H +#endif // KLEIDICV_PROCESS_FILTERS_FILTER_2D_H diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 988d74331..eb27f7efd 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -2056,10 +2056,11 @@ kleidicv_error_t kleidicv_warp_perspective_u8( /// equal to `kernel_height - 1`. /// @param channels Number of channels in the data. Must not be more than /// @ref KLEIDICV_MAXIMUM_CHANNEL_COUNT. -/// @param kernel_width Width of the Median kernel. Must be 5 or 7 and equal -/// to `kernel_height`. -/// @param kernel_height Height of the Median kernel. Must be 5 or 7 and equal -/// to `kernel_width`. +/// @param kernel_width Width of the Median kernel. Must be 3 or 5 or 7 and +/// equal to `kernel_height`. +/// @param kernel_height Height of the Median kernel. Must be 3 or 5 or 7 and +/// equal to `kernel_width`. +/// /// @param border_type Way of handling the border. The supported border types /// are: \n /// - @ref KLEIDICV_BORDER_TYPE_REPLICATE \n diff --git a/kleidicv/src/filters/median_blur_neon.cpp b/kleidicv/src/filters/median_blur_neon.cpp index 00c66ae82..bfbe7537d 100644 --- a/kleidicv/src/filters/median_blur_neon.cpp +++ b/kleidicv/src/filters/median_blur_neon.cpp @@ -3,14 +3,15 @@ // SPDX-License-Identifier: Apache-2.0 #include "kleidicv/ctypes.h" -#include "kleidicv/filters/filter_2d.h" -#include "kleidicv/filters/filter_2d_5x5_neon.h" -#include "kleidicv/filters/filter_2d_7x7_neon.h" +#include "kleidicv/filters/filter_2d_neon.h" #include "kleidicv/filters/median_blur.h" +#include "kleidicv/filters/process_fitler_2d.h" #include "kleidicv/kleidicv.h" #include "kleidicv/neon.h" +#include "kleidicv/workspace/border_3x3.h" #include "kleidicv/workspace/border_5x5.h" #include "kleidicv/workspace/border_7x7.h" +#include "median_blur_sorting_network_3x3.h" #include "median_blur_sorting_network_5x5.h" #include "median_blur_sorting_network_7x7.h" @@ -21,102 +22,151 @@ template class MedianBlur; template -class MedianBlurBase { - protected: - class vectorized_comparator { - public: - using SourceVectorType = typename VecTraits::VectorType; - - static void compare_and_swap(SourceVectorType& left, - SourceVectorType& right, Monostate&) { - SourceVectorType max_value = vmaxq(left, right); - SourceVectorType min_value = vminq(left, right); - left = min_value; - right = max_value; - } +class VectorizedComparator { + public: + using SourceVectorType = typename VecTraits::VectorType; + + static void compare_and_swap(SourceVectorType& left, SourceVectorType& right, + Monostate&) { + SourceVectorType max_value = vmaxq(left, right); + SourceVectorType min_value = vminq(left, right); + left = min_value; + right = max_value; + } - static void min(SourceVectorType& left, SourceVectorType& right, - Monostate&) { - left = vminq(left, right); - } + static void min(SourceVectorType& left, SourceVectorType& right, Monostate&) { + left = vminq(left, right); + } - static void max(SourceVectorType& left, SourceVectorType& right, - Monostate&) { - right = vmaxq(left, right); - } - }; - class scalar_comparator { - public: - static void compare_and_swap(ScalarType& left, ScalarType& right, - Monostate&) { - if (left > right) { - std::swap(left, right); - } - } + static void max(SourceVectorType& left, SourceVectorType& right, Monostate&) { + right = vmaxq(left, right); + } + static SourceVectorType get_min(SourceVectorType& left, + SourceVectorType& right, Monostate&) { + return vminq(left, right); + } + static SourceVectorType get_max(SourceVectorType& left, + SourceVectorType& right, Monostate&) { + return vmaxq(left, right); + } +}; - static void min(ScalarType& left, ScalarType& right, Monostate&) { - left = std::min(left, right); +template +class ScalarComparator { + public: + static void compare_and_swap(ScalarType& left, ScalarType& right, + Monostate&) { + if (left > right) { + std::swap(left, right); } + } - static void max(ScalarType& left, ScalarType& right, Monostate&) { - right = std::max(left, right); - } - }; + static void min(ScalarType& left, ScalarType& right, Monostate&) { + left = std::min(left, right); + } + + static void max(ScalarType& left, ScalarType& right, Monostate&) { + right = std::max(left, right); + } + static ScalarType get_min(ScalarType& left, ScalarType& right, Monostate&) { + return std::min(left, right); + } + static ScalarType get_max(ScalarType& left, ScalarType& right, Monostate&) { + return std::max(left, right); + } }; +// Template for Median Blur 3x3 filters. +template +class MedianBlur { + public: + using SourceType = ScalarType; + using DestinationType = SourceType; + using SourceVectorType = typename VecTraits::VectorType; + using DestinationVectorType = typename VecTraits::VectorType; + + template + void vector_path(KernelWindowFunctor& KernelWindow, + DestinationVectorType& output_vec) const { + Monostate ctx; + sorting_network3x3_single_row>( + KernelWindow, output_vec, ctx); + } + + template + void vector_path_for_dual_row_handling( + KernelWindowFunctor& KernelWindow, DestinationVectorType& output_vec_0, + DestinationVectorType& output_vec_1) const { + Monostate ctx; + sorting_network3x3_dual_rows>( + KernelWindow, output_vec_0, output_vec_1, ctx); + } + + template + void scalar_path(KernelWindowFunctor& KernelWindow, + DestinationType& output_vec) const { + Monostate ctx; + sorting_network3x3_single_row>( + KernelWindow, output_vec, ctx); + } + + template + void scalar_path_for_dual_row_handling(KernelWindowFunctor& KernelWindow, + DestinationType& output_vec0, + DestinationType& output_vec1) const { + Monostate ctx; + sorting_network3x3_dual_rows>( + KernelWindow, output_vec0, output_vec1, ctx); + } +}; // end of class MedianBlur + // Template for Median Blur 5x5 filters. template -class MedianBlur : public MedianBlurBase { +class MedianBlur { public: using SourceType = ScalarType; using DestinationType = SourceType; using SourceVectorType = typename VecTraits::VectorType; using DestinationVectorType = typename VecTraits::VectorType; - using VectorComparator = - typename MedianBlurBase::vectorized_comparator; - using ScalarComparator = - typename MedianBlurBase::scalar_comparator; template void vector_path(KernelWindowFunctor& KernelWindow, DestinationVectorType& output_vec) const { Monostate ctx; - sorting_network5x5(KernelWindow, output_vec, ctx); + sorting_network5x5>(KernelWindow, + output_vec, ctx); } template void scalar_path(KernelWindowFunctor& KernelWindow, DestinationType& dst) const { Monostate ctx; - sorting_network5x5(KernelWindow, dst, ctx); + sorting_network5x5>(KernelWindow, dst, ctx); } }; // end of class MedianBlur -// Template for Median Blur 7x57 filters. +// Template for Median Blur 7x7 filters. template -class MedianBlur : public MedianBlurBase { +class MedianBlur { public: using SourceType = ScalarType; using DestinationType = SourceType; using SourceVectorType = typename VecTraits::VectorType; using DestinationVectorType = typename VecTraits::VectorType; - using VectorComparator = - typename MedianBlurBase::vectorized_comparator; - using ScalarComparator = - typename MedianBlurBase::scalar_comparator; template void vector_path(KernelWindowFunctor& KernelWindow, DestinationVectorType& dst) const { Monostate ctx; - sorting_network7x7(KernelWindow, dst, ctx); + sorting_network7x7>(KernelWindow, dst, + ctx); } template void scalar_path(KernelWindowFunctor& KernelWindow, DestinationType& dst) const { Monostate ctx; - sorting_network7x7(KernelWindow, dst, ctx); + sorting_network7x7>(KernelWindow, dst, ctx); } }; // end of class MedianBlur @@ -130,18 +180,25 @@ kleidicv_error_t median_blur_stripe(const T* src, size_t src_stride, T* dst, Rectangle rect{width, height}; Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - if (kernel_width == 5) { + + if (kernel_width == 3) { + MedianBlur median_filter; + Filter2D3x3> filter{median_filter}; + process_filter2d_by_dual_rows(rect, y_begin, y_end, src_rows, dst_rows, + border_type, filter); + } else if (kernel_width == 5) { MedianBlur median_filter; Filter2D5x5> filter{median_filter}; process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); return KLEIDICV_OK; + } else { + MedianBlur median_filter; + Filter2D7x7> filter{median_filter}; + process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, + filter); } - MedianBlur median_filter; - Filter2D7x7> filter{median_filter}; - process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, - filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/median_blur_sc.h b/kleidicv/src/filters/median_blur_sc.h index 000cbd643..d16629e87 100644 --- a/kleidicv/src/filters/median_blur_sc.h +++ b/kleidicv/src/filters/median_blur_sc.h @@ -6,17 +6,17 @@ #define KLEIDICV_MEDIAN_BLUR_SC_H #include -#include #include "kleidicv/ctypes.h" -#include "kleidicv/filters/filter_2d.h" -#include "kleidicv/filters/filter_2d_5x5_sc.h" -#include "kleidicv/filters/filter_2d_7x7_sc.h" +#include "kleidicv/filters/filter_2d_sc.h" #include "kleidicv/filters/median_blur.h" +#include "kleidicv/filters/process_fitler_2d.h" #include "kleidicv/kleidicv.h" #include "kleidicv/sve2.h" +#include "kleidicv/workspace/border_3x3.h" #include "kleidicv/workspace/border_5x5.h" #include "kleidicv/workspace/border_7x7.h" +#include "median_blur_sorting_network_3x3.h" #include "median_blur_sorting_network_5x5.h" #include "median_blur_sorting_network_7x7.h" namespace KLEIDICV_TARGET_NAMESPACE { @@ -26,36 +26,69 @@ template class MedianBlur; template -class MedianBlurBase { - protected: - class vectorized_comparator { - public: - using SourceVectorType = typename VecTraits::VectorType; - - static void compare_and_swap(SourceVectorType& left, - SourceVectorType& right, - svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { - SourceVectorType max_value = svmax_m(pg, left, right); - SourceVectorType min_value = svmin_m(pg, left, right); - left = min_value; - right = max_value; - } - - static void min(SourceVectorType& left, SourceVectorType& right, - svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { - left = svmin_m(pg, left, right); - } - - static void max(SourceVectorType& left, SourceVectorType& right, - svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { - right = svmax_m(pg, left, right); - } - }; +class VectorComparator { + public: + using SourceVectorType = typename VecTraits::VectorType; + + static void compare_and_swap(SourceVectorType& left, SourceVectorType& right, + svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { + SourceVectorType max_value = svmax_m(pg, left, right); + SourceVectorType min_value = svmin_m(pg, left, right); + left = min_value; + right = max_value; + } + + static void min(SourceVectorType& left, SourceVectorType& right, + svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { + left = svmin_m(pg, left, right); + } + + static void max(SourceVectorType& left, SourceVectorType& right, + svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { + right = svmax_m(pg, left, right); + } + static SourceVectorType get_min(SourceVectorType& left, + SourceVectorType& right, + svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { + return svmin_m(pg, left, right); + } + static SourceVectorType get_max(SourceVectorType& left, + SourceVectorType& right, + svbool_t& pg) KLEIDICV_STREAMING_COMPATIBLE { + return svmax_m(pg, left, right); + } }; +// Template for Median Blur 3x3 filters. +template +class MedianBlur { + public: + using SourceType = ScalarType; + using DestinationType = SourceType; + using SourceVectorType = typename VecTraits::VectorType; + using DestinationVectorType = typename VecTraits::VectorType; + + template + void vector_path(svbool_t& pg, KernelWindowFunctor& KernelWindow, + DestinationVectorType& output_vec) const + KLEIDICV_STREAMING_COMPATIBLE { + sorting_network3x3_single_row>(KernelWindow, + output_vec, pg); + } + + template + void vector_path_for_dual_row_handling( + svbool_t& pg, KernelWindowFunctor& KernelWindow, + DestinationVectorType& output_vec_0, + DestinationVectorType& output_vec_1) const KLEIDICV_STREAMING_COMPATIBLE { + sorting_network3x3_dual_rows>( + KernelWindow, output_vec_0, output_vec_1, pg); + } +}; // end of class MedianBlur + // Template for Median Blur 5x5 filters. template -class MedianBlur : public MedianBlurBase { +class MedianBlur { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -64,19 +97,19 @@ class MedianBlur : public MedianBlurBase { using SourceVectorType = typename SourceVecTraits::VectorType; using DestinationVectorType = typename KLEIDICV_TARGET_NAMESPACE::VecTraits< DestinationType>::VectorType; - using VectorComparator = - typename MedianBlurBase::vectorized_comparator; + template - void vector_path(KernelWindowFunctor& KernelWindow, - DestinationVectorType& output_vec, - svbool_t& pg) const KLEIDICV_STREAMING_COMPATIBLE { - sorting_network5x5(KernelWindow, output_vec, pg); + void vector_path(svbool_t& pg, KernelWindowFunctor& KernelWindow, + DestinationVectorType& output_vec) const + KLEIDICV_STREAMING_COMPATIBLE { + sorting_network5x5>(KernelWindow, output_vec, + pg); } }; // end of class MedianBlur // Template for Median Blur 7x7 filters. template -class MedianBlur : public MedianBlurBase { +class MedianBlur { public: using SourceType = ScalarType; using DestinationType = SourceType; @@ -85,14 +118,13 @@ class MedianBlur : public MedianBlurBase { using SourceVectorType = typename SourceVecTraits::VectorType; using DestinationVectorType = typename KLEIDICV_TARGET_NAMESPACE::VecTraits< DestinationType>::VectorType; - using VectorComparator = - typename MedianBlurBase::vectorized_comparator; template - void vector_path(KernelWindowFunctor& KernelWindow, - DestinationVectorType& output_vec, - svbool_t& pg) const KLEIDICV_STREAMING_COMPATIBLE { - sorting_network7x7(KernelWindow, output_vec, pg); + void vector_path(svbool_t& pg, KernelWindowFunctor& KernelWindow, + DestinationVectorType& output_vec) const + KLEIDICV_STREAMING_COMPATIBLE { + sorting_network7x7>(KernelWindow, output_vec, + pg); } }; // end of class MedianBlur @@ -105,18 +137,24 @@ kleidicv_error_t median_blur_stripe_sc( Rectangle rect{width, height}; Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - if (kernel_width == 5) { + if (kernel_width == 3) { + MedianBlur median_filter; + Filter2D3x3> filter{median_filter}; + process_filter2d_by_dual_rows(rect, y_begin, y_end, src_rows, dst_rows, + border_type, filter); + } else if (kernel_width == 5) { MedianBlur median_filter; Filter2D5x5> filter{median_filter}; process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, filter); return KLEIDICV_OK; + } else { + MedianBlur median_filter; + Filter2D7x7> filter{median_filter}; + process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, + filter); } - MedianBlur median_filter; - Filter2D7x7> filter{median_filter}; - process_filter2d(rect, y_begin, y_end, src_rows, dst_rows, border_type, - filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/median_blur_sorting_network_3x3.h b/kleidicv/src/filters/median_blur_sorting_network_3x3.h new file mode 100644 index 000000000..d7381cd9f --- /dev/null +++ b/kleidicv/src/filters/median_blur_sorting_network_3x3.h @@ -0,0 +1,109 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_MEDIAN_BLUR_SORTING_NETWORK_3X3_H +#define KLEIDICV_MEDIAN_BLUR_SORTING_NETWORK_3X3_H + +#include + +#include "kleidicv/kleidicv.h" +#include "kleidicv/traits.h" + +namespace KLEIDICV_TARGET_NAMESPACE { +template +void sorting_network3x3_single_row(KernelWindowFunctor& KernelWindow, + T& output_vec, ContextType& context) + KLEIDICV_STREAMING_COMPATIBLE { + // full sort row + Comparator::compare_and_swap(KernelWindow(0, 0), KernelWindow(0, 2), context); + Comparator::compare_and_swap(KernelWindow(0, 0), KernelWindow(0, 1), context); + Comparator::compare_and_swap(KernelWindow(0, 1), KernelWindow(0, 2), context); + Comparator::compare_and_swap(KernelWindow(1, 0), KernelWindow(1, 2), context); + Comparator::compare_and_swap(KernelWindow(1, 0), KernelWindow(1, 1), context); + Comparator::compare_and_swap(KernelWindow(1, 1), KernelWindow(1, 2), context); + Comparator::compare_and_swap(KernelWindow(2, 0), KernelWindow(2, 2), context); + Comparator::compare_and_swap(KernelWindow(2, 0), KernelWindow(2, 1), context); + Comparator::compare_and_swap(KernelWindow(2, 1), KernelWindow(2, 2), context); + // find max in col 0 + T max_0_1_2 = + Comparator::get_max(KernelWindow(0, 0), KernelWindow(1, 0), context); + max_0_1_2 = Comparator::get_max(max_0_1_2, KernelWindow(2, 0), context); + // find mid in col 1 + T src_tmp_0 = KernelWindow(0, 1); + T mid_0_1_2 = KernelWindow(1, 1); + T src_tmp_2 = KernelWindow(2, 1); + Comparator::compare_and_swap(src_tmp_0, src_tmp_2, context); + Comparator::max(src_tmp_0, mid_0_1_2, context); + Comparator::min(mid_0_1_2, src_tmp_2, context); + // find min in col 2 + T min_0_1_2 = + Comparator::get_min(KernelWindow(0, 2), KernelWindow(1, 2), context); + min_0_1_2 = Comparator::get_min(min_0_1_2, KernelWindow(2, 2), context); + // find median + Comparator::compare_and_swap(min_0_1_2, max_0_1_2, context); + Comparator::max(min_0_1_2, mid_0_1_2, context); + Comparator::min(mid_0_1_2, max_0_1_2, context); + output_vec = mid_0_1_2; +} + +template +void sorting_network3x3_dual_rows( + KernelWindowFunctor& KernelWindow, T& output_vec_0, T& output_vec_1, + ContextType& context) KLEIDICV_STREAMING_COMPATIBLE { + // full sort row + Comparator::compare_and_swap(KernelWindow(0, 0), KernelWindow(0, 2), context); + Comparator::compare_and_swap(KernelWindow(0, 0), KernelWindow(0, 1), context); + Comparator::compare_and_swap(KernelWindow(0, 1), KernelWindow(0, 2), context); + Comparator::compare_and_swap(KernelWindow(1, 0), KernelWindow(1, 2), context); + Comparator::compare_and_swap(KernelWindow(1, 0), KernelWindow(1, 1), context); + Comparator::compare_and_swap(KernelWindow(1, 1), KernelWindow(1, 2), context); + Comparator::compare_and_swap(KernelWindow(2, 0), KernelWindow(2, 2), context); + Comparator::compare_and_swap(KernelWindow(2, 0), KernelWindow(2, 1), context); + Comparator::compare_and_swap(KernelWindow(2, 1), KernelWindow(2, 2), context); + Comparator::compare_and_swap(KernelWindow(3, 0), KernelWindow(3, 2), context); + Comparator::compare_and_swap(KernelWindow(3, 0), KernelWindow(3, 1), context); + Comparator::compare_and_swap(KernelWindow(3, 1), KernelWindow(3, 2), context); + // sort common element + Comparator::compare_and_swap(KernelWindow(1, 0), KernelWindow(2, 0), context); + Comparator::compare_and_swap(KernelWindow(1, 2), KernelWindow(2, 2), context); + Comparator::compare_and_swap(KernelWindow(1, 1), KernelWindow(2, 1), context); + // find first median + // find max in col 0 + T max_0_1_2 = + Comparator::get_max(KernelWindow(0, 0), KernelWindow(2, 0), context); + // find mid in col 1 + T src_tmp_0 = + Comparator::get_min(KernelWindow(0, 1), KernelWindow(2, 1), context); + T mid_0_1_2 = Comparator::get_max(KernelWindow(1, 1), src_tmp_0, context); + // find min in col 2 + T min_0_1_2 = + Comparator::get_min(KernelWindow(0, 2), KernelWindow(1, 2), context); + // find median + Comparator::compare_and_swap(min_0_1_2, max_0_1_2, context); + Comparator::max(min_0_1_2, mid_0_1_2, context); + Comparator::min(mid_0_1_2, max_0_1_2, context); + output_vec_0 = mid_0_1_2; + // find second median + // find max in col 0 + max_0_1_2 = + Comparator::get_max(KernelWindow(2, 0), KernelWindow(3, 0), context); + // find mid in col 1 + src_tmp_0 = + Comparator::get_max(KernelWindow(1, 1), KernelWindow(3, 1), context); + mid_0_1_2 = Comparator::get_min(KernelWindow(2, 1), src_tmp_0, context); + // find min in col 2 + min_0_1_2 = + Comparator::get_min(KernelWindow(1, 2), KernelWindow(3, 2), context); + // find median + Comparator::compare_and_swap(min_0_1_2, max_0_1_2, context); + Comparator::max(min_0_1_2, mid_0_1_2, context); + Comparator::min(mid_0_1_2, max_0_1_2, context); + output_vec_1 = mid_0_1_2; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_MEDIAN_BLUR_SORTING_NETWORK_3X3_H diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index 17e09df9f..a28ef3a9a 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -31,6 +31,7 @@ SepFilter2D_5x5_U8: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' ' SepFilter2D_5x5_U16: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '($PIXEL_FORMAT, 16UC1, 5, BORDER_REPLICATE)' SepFilter2D_5x5_S16: opencv_perf_imgproc '*KleidiCV_SepFilter2D.SepFilter2D/*' '($PIXEL_FORMAT, 16SC1, 5, BORDER_REPLICATE)' +MedianBlur3x3: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 3)' MedianBlur5x5: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 5)' MedianBlur7x7: opencv_perf_imgproc '*medianBlur/*' '($PIXEL_FORMAT, 8UC1, 7)' diff --git a/test/api/test_median_blur.cpp b/test/api/test_median_blur.cpp index 2d4c608d2..2c1b12c72 100644 --- a/test/api/test_median_blur.cpp +++ b/test/api/test_median_blur.cpp @@ -71,7 +71,7 @@ class MedianBlurTest : public testing::Test { std::vector dst_paddings = {0}; std::vector heights = {30}; std::vector channels = {1, 4}; - std::vector filter_sizes = {5, 7}; + std::vector filter_sizes = {3, 5, 7}; std::vector border_types = { KLEIDICV_BORDER_TYPE_REPLICATE, KLEIDICV_BORDER_TYPE_REFLECT, KLEIDICV_BORDER_TYPE_WRAP, KLEIDICV_BORDER_TYPE_REVERSE}; @@ -86,7 +86,7 @@ class MedianBlurTest : public testing::Test { std::vector dst_paddings = {13}; std::vector heights = {10}; std::vector channels = {1, 4}; - std::vector filter_sizes = {5, 7}; + std::vector filter_sizes = {3, 5, 7}; std::vector border_types = { KLEIDICV_BORDER_TYPE_REPLICATE}; @@ -136,8 +136,8 @@ class MedianBlurTest : public testing::Test { } private: - int handle_under_over_read(int index, int limit, - kleidicv_border_type_t border_type) { + int get_physical_index(int index, int limit, + kleidicv_border_type_t border_type) { int result = 0; if (index >= 0 && index < limit) { @@ -163,11 +163,10 @@ class MedianBlurTest : public testing::Test { } case KLEIDICV_BORDER_TYPE_REVERSE: { - int period = 2 * limit - 2; if (index < 0) { - result = (-index) % period; + result = -index; } else { - result = period - (index % period); + result = 2 * limit - index - 2; } break; } @@ -184,11 +183,11 @@ class MedianBlurTest : public testing::Test { test::Array2D& dst, size_t filter_size, kleidicv_border_type_t border_type) { const int half_kernel_size = static_cast(filter_size) / 2; - const size_t height = src.height(); - const size_t width = src.width() / src.channels(); + const int height = static_cast(src.height()); + const int width = static_cast(src.width() / src.channels()); - for (size_t row = 0; row < height; ++row) { - for (size_t col = 0; col < width; ++col) { + for (int row = 0; row < height; ++row) { + for (int col = 0; col < width; ++col) { for (size_t channel = 0; channel < src.channels(); ++channel) { std::vector window; @@ -197,9 +196,9 @@ class MedianBlurTest : public testing::Test { for (int window_col = -half_kernel_size; window_col <= half_kernel_size; ++window_col) { int row_after_border_handling = - handle_under_over_read(row + window_row, height, border_type); + get_physical_index(row + window_row, height, border_type); int col_after_border_handling = - handle_under_over_read(col + window_col, width, border_type); + get_physical_index(col + window_col, width, border_type); window.push_back(*src.at( row_after_border_handling, @@ -233,7 +232,7 @@ TYPED_TEST(MedianBlurTest, RunAllParamCombinationsWithoutPadding) { } TYPED_TEST(MedianBlurTest, RunAllParamCombinationsWithSmallImageSize) { - for (auto ksize : {5, 7}) { + for (auto ksize : {3, 5, 7}) { for (const auto& params : TestFixture::get_small_image_test_cases(ksize)) { this->run_test_case(params); } diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 9fa419e13..6b8eb324e 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -113,7 +113,7 @@ class Thread : public testing::TestWithParam

{ (void)thread_count; size_t channels = 1; kleidicv_border_type_t border_type = KLEIDICV_BORDER_TYPE_REPLICATE; - for (auto ksize : {5, 7}) { + for (auto ksize : {3, 5, 7}) { check_unary_op(single_threaded_func, multithreaded_func, channels, channels, channels, ksize, ksize, border_type); } @@ -408,10 +408,11 @@ void check_median_blur_not_implemented(MultithreadedFunc multithreaded_func) { 25, 25, 1, 5, 3, KLEIDICV_BORDER_TYPE_REPLICATE, get_multithreading_fake(2))); + // Only odd kernel sizes are supported for median filtering EXPECT_EQ( KLEIDICV_ERROR_NOT_IMPLEMENTED, multithreaded_func(src1.data(), src1.stride(), dst1.data(), dst1.stride(), - 25, 25, 1, 3, 3, KLEIDICV_BORDER_TYPE_REPLICATE, + 25, 25, 1, 4, 4, KLEIDICV_BORDER_TYPE_REPLICATE, get_multithreading_fake(2))); } -- GitLab