From ac36af2c230823a051d87db3f84ab1dc3a8a36b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 26 Jun 2024 18:01:09 +0200 Subject: [PATCH] Add sigma support for Gaussian blur Sigma is the common notation for standard deviation. With this change the standard deviation of the filter used for blurring can be set. --- CHANGELOG.md | 1 + adapters/opencv/kleidicv_hal.cpp | 46 ++ adapters/opencv/kleidicv_hal.h | 23 + adapters/opencv/opencv-4.9.patch | 34 +- benchmark/benchmark.cpp | 70 +-- conformity/opencv/test_gaussian_blur.cpp | 45 +- doc/opencv.md | 3 +- kleidicv/include/kleidicv/kleidicv.h | 10 +- kleidicv/include/kleidicv/sigma.h | 81 +++ kleidicv/src/filters/gaussian_blur_api.cpp | 7 +- kleidicv/src/filters/gaussian_blur_neon.cpp | 257 ++++++--- kleidicv/src/filters/gaussian_blur_sc.h | 536 +++++++++++++++--- kleidicv/src/filters/gaussian_blur_sme2.cpp | 6 +- kleidicv/src/filters/gaussian_blur_sve2.cpp | 6 +- scripts/benchmark/run_benchmarks_4K.sh | 5 + scripts/benchmark/run_benchmarks_FHD.sh | 5 + test/api/test_gaussian_blur.cpp | 578 ++++++++++++++------ 17 files changed, 1355 insertions(+), 358 deletions(-) create mode 100644 kleidicv/include/kleidicv/sigma.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 24fd5d20f..47e361235 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ This changelog aims to follow the guiding principles of - Bitwise and. - Gaussian Blur for 7x7 kernels. - Gaussian Blur for 15x15 kernels. +- Enable specifying standard deviation for Gaussian blur. - Scale function for float. - Add, subtract, multiply & absdiff enabled in OpenCV HAL. - MinMax enabled in OpenCV HAL, float version added. diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 1a90fd24c..21e04c9ac 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -282,6 +282,52 @@ int gaussian_blur_binomial(const uchar *src_data, size_t src_step, return convert_error(blur_err ? blur_err : release_err); } +int gaussian_blur(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int depth, int cn, + size_t margin_left, size_t margin_top, size_t margin_right, + size_t margin_bottom, size_t kernel_width, + size_t kernel_height, double sigma_x, double sigma_y, + int border_type) { + if (src_data == dst_data) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (margin_left != 0 || margin_top != 0 || margin_right != 0 || + margin_bottom != 0) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + switch (depth) { + case CV_8U: + break; + + default: + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_border_type_t kleidicv_border_type; + if (from_opencv(border_type, kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_filter_context_t *context; + if (kleidicv_error_t create_err = kleidicv_filter_context_create( + &context, cn, kernel_width, kernel_height, static_cast(width), + static_cast(height))) { + return convert_error(create_err); + } + + kleidicv_error_t blur_err = kleidicv_gaussian_blur_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height, cn, + kernel_width, kernel_height, sigma_x, sigma_y, kleidicv_border_type, + context); + + kleidicv_error_t release_err = kleidicv_filter_context_release(context); + + return convert_error(blur_err ? blur_err : release_err); +} + struct MorphologyParams { kleidicv_morphology_context_t *context; decltype(kleidicv_dilate_u8) impl; diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 5878c0115..f703b6ef0 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -52,6 +52,13 @@ int gaussian_blur_binomial(const uchar *src_data, size_t src_step, size_t margin_bottom, size_t kernel_size, int border_type); +int gaussian_blur(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int depth, int cn, + size_t margin_left, size_t margin_top, size_t margin_right, + size_t margin_bottom, size_t kernel_width, + size_t kernel_height, double sigma_x, double sigma_y, + int border_type); + int morphology_init(cvhalFilter2D **context, int operation, int src_type, int dst_type, int max_width, int max_height, int kernel_type, uchar *kernel_data, size_t kernel_step, @@ -206,6 +213,22 @@ static inline int kleidicv_gaussian_blur_binomial_with_fallback( #define cv_hal_gaussianBlurBinomial \ kleidicv_gaussian_blur_binomial_with_fallback +// gaussian_blur +static inline int kleidicv_gaussian_blur_with_fallback( + const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, + int width, int height, int depth, int cn, size_t margin_left, + size_t margin_top, size_t margin_right, size_t margin_bottom, + size_t kernel_width, size_t kernel_height, double sigma_x, double sigma_y, + int border_type) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + gaussian_blur, cv_hal_gaussianBlur, src_data, src_step, dst_data, + dst_step, width, height, depth, cn, margin_left, margin_top, margin_right, + margin_bottom, kernel_width, kernel_height, sigma_x, sigma_y, + border_type); +} +#undef cv_hal_gaussianBlur +#define cv_hal_gaussianBlur kleidicv_gaussian_blur_with_fallback + // morphology_init static inline int kleidicv_morphology_init_with_fallback( cvhalFilter2D **context, int operation, int src_type, int dst_type, diff --git a/adapters/opencv/opencv-4.9.patch b/adapters/opencv/opencv-4.9.patch index 87ca8ad88..d731aa2fb 100644 --- a/adapters/opencv/opencv-4.9.patch +++ b/adapters/opencv/opencv-4.9.patch @@ -179,10 +179,10 @@ index c066f3d6f3..d8b58015f9 100644 @brief Computes Sobel derivatives @param src_depth Depth of source image diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp -index 8a521d6df3..21cfe82595 100644 +index 8a521d6df3..c1e5ec56d1 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp +++ b/modules/imgproc/src/smooth.dispatch.cpp -@@ -654,6 +654,20 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, +@@ -654,6 +654,34 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kx, ky, borderType) ); @@ -199,7 +199,37 @@ index 8a521d6df3..21cfe82595 100644 + CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, + ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED); + } ++ ++ { ++ Mat src = _src.getMat(); ++ Mat dst = _dst.getMat(); ++ ++ Point ofs; ++ Size wsz(src.cols, src.rows); ++ if(!(borderType & BORDER_ISOLATED)) ++ src.locateROI( wsz, ofs ); ++ ++ CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, ++ ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, ++ sigma1, sigma2, borderType&~BORDER_ISOLATED); ++ } + if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.isSubmatrix())) { std::vector fkx, fky; +@@ -737,15 +765,6 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + +- Point ofs; +- Size wsz(src.cols, src.rows); +- if(!(borderType & BORDER_ISOLATED)) +- src.locateROI( wsz, ofs ); +- +- CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, +- ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, +- sigma1, sigma2, borderType&~BORDER_ISOLATED); +- + CV_OVX_RUN(true, + openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) + diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 07a290df1..59825c41d 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -242,7 +242,7 @@ static void resize_linear_8x8_f32(benchmark::State& state) { } BENCHMARK(resize_linear_8x8_f32); -template +template static void gaussian_blur(benchmark::State& state) { kleidicv_filter_context_t* context; kleidicv_error_t err = kleidicv_filter_context_create( @@ -257,52 +257,38 @@ static void gaussian_blur(benchmark::State& state) { get_source_buffer_a(), image_width * Channels * sizeof(T), get_destination_buffer(), image_width * Channels * sizeof(T), image_width, image_height, Channels, - KernelSize, KernelSize, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, - context); + KernelSize, KernelSize, (Binomial ? 0.0 : 2.0), (Binomial ? 0.0 : 2.0), + KLEIDICV_BORDER_TYPE_REFLECT, context); }); (void)kleidicv_filter_context_release(context); } -static void gaussian_blur_3x3_u8_1ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_3x3_u8_1ch); - -static void gaussian_blur_3x3_u8_3ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_3x3_u8_3ch); - -static void gaussian_blur_5x5_u8_1ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_5x5_u8_1ch); - -static void gaussian_blur_5x5_u8_3ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_5x5_u8_3ch); - -static void gaussian_blur_7x7_u8_1ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_7x7_u8_1ch); - -static void gaussian_blur_7x7_u8_3ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_7x7_u8_3ch); - -static void gaussian_blur_15x15_u8_1ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_15x15_u8_1ch); - -static void gaussian_blur_15x15_u8_3ch(benchmark::State& state) { - gaussian_blur(state); -} -BENCHMARK(gaussian_blur_15x15_u8_3ch); +#define BENCH_GAUSSIAN_BLUR(kernel_size, channel_number) \ + static void \ + gaussian_blur_binomial_u8##_##kernel_size##x##kernel_size##_##channel_number##ch( \ + benchmark::State& state) { \ + gaussian_blur(state); \ + } \ + BENCHMARK( \ + gaussian_blur_binomial_u8##_##kernel_size##x##kernel_size##_##channel_number##ch); \ + \ + static void \ + gaussian_blur_custom_sigma_u8##_##kernel_size##x##kernel_size##_##channel_number##ch( \ + benchmark::State& state) { \ + gaussian_blur(state); \ + } \ + BENCHMARK( \ + gaussian_blur_custom_sigma_u8##_##kernel_size##x##kernel_size##_##channel_number##ch); + +BENCH_GAUSSIAN_BLUR(3, 1); +BENCH_GAUSSIAN_BLUR(3, 3); +BENCH_GAUSSIAN_BLUR(5, 1); +BENCH_GAUSSIAN_BLUR(5, 3); +BENCH_GAUSSIAN_BLUR(7, 1); +BENCH_GAUSSIAN_BLUR(7, 3); +BENCH_GAUSSIAN_BLUR(15, 1); +BENCH_GAUSSIAN_BLUR(15, 3); template static void sobel_filter(Function f, benchmark::State& state) { diff --git a/conformity/opencv/test_gaussian_blur.cpp b/conformity/opencv/test_gaussian_blur.cpp index 1191488d5..7d0ed6da3 100644 --- a/conformity/opencv/test_gaussian_blur.cpp +++ b/conformity/opencv/test_gaussian_blur.cpp @@ -8,14 +8,20 @@ template cv::Mat exec_gaussian_blur(cv::Mat& input) { + double sigma = + *reinterpret_cast(&input.at(0, input.rows - 2)); + // clone is required, otherwise the result matrix is treated as part of a + // bigger image, and it would have impact on what border types are supported + cv::Mat input_mat = input.rowRange(0, input.rows - 2).clone(); cv::Size kernel(KernelSize, KernelSize); cv::Mat result; - cv::GaussianBlur(input, result, kernel, 0, 0, BorderType); + cv::GaussianBlur(input_mat, result, kernel, sigma, sigma, BorderType); return result; } #if MANAGER -template +template bool test_gaussian_blur(int index, RecreatedMessageQueue& request_queue, RecreatedMessageQueue& reply_queue) { cv::RNG rng(0); @@ -29,9 +35,22 @@ bool test_gaussian_blur(int index, RecreatedMessageQueue& request_queue, for (size_t y = size_min; y <= size_max; ++y) { for (size_t x = size_min; x <= size_max; ++x) { - cv::Mat input(y, x, CV_8UC(Channels)); + // Two extra lines allocated to be sure sigma can be placed next to the + // real input + cv::Mat input(y + 2, x, CV_8UC(Channels)); rng.fill(input, cv::RNG::UNIFORM, 0, 255); + double sigma = 0.0; + + if constexpr (!Binomial) { + // cv::rng returns [0,1) range in case of float or double, so it is + // multiplied by 10 + sigma = static_cast(rng) * 10; + } + + // sigma is embedded into the input matrix + *reinterpret_cast(&input.at(0, input.rows - 2)) = sigma; + cv::Mat actual = exec_gaussian_blur(input); cv::Mat expected = get_expected_from_subordinate(index, request_queue, reply_queue, input); @@ -64,6 +83,11 @@ std::vector& gaussian_blur_tests_get() { TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)), TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 1 channel, random sigma", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 1, false>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 2 channel, random sigma", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 2, false>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 3 channel, random sigma", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 3, false>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 4 channel, random sigma", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 4, false>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 3x3, BORDER_REFLECT, 1 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)), TEST("Gaussian blur 3x3, BORDER_REFLECT, 2 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)), TEST("Gaussian blur 3x3, BORDER_REFLECT, 3 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)), @@ -84,6 +108,11 @@ std::vector& gaussian_blur_tests_get() { TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)), TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 1 channel, random sigma", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 1, false>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 2 channel, random sigma", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 2, false>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 3 channel, random sigma", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 3, false>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 4 channel, random sigma", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 4, false>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 5x5, BORDER_REFLECT, 1 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)), TEST("Gaussian blur 5x5, BORDER_REFLECT, 2 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)), TEST("Gaussian blur 5x5, BORDER_REFLECT, 3 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)), @@ -104,6 +133,11 @@ std::vector& gaussian_blur_tests_get() { TEST("Gaussian blur 7x7, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<7, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<7, cv::BORDER_REFLECT_101>)), TEST("Gaussian blur 7x7, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<7, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<7, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 7x7, BORDER_REFLECT_101, 1 channel, random sigma", (test_gaussian_blur<7, cv::BORDER_REFLECT_101, 1, false>), (exec_gaussian_blur<7, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 7x7, BORDER_REFLECT_101, 2 channel, random sigma", (test_gaussian_blur<7, cv::BORDER_REFLECT_101, 2, false>), (exec_gaussian_blur<7, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 7x7, BORDER_REFLECT_101, 3 channel, random sigma", (test_gaussian_blur<7, cv::BORDER_REFLECT_101, 3, false>), (exec_gaussian_blur<7, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 7x7, BORDER_REFLECT_101, 4 channel, random sigma", (test_gaussian_blur<7, cv::BORDER_REFLECT_101, 4, false>), (exec_gaussian_blur<7, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 7x7, BORDER_REFLECT, 1 channel", (test_gaussian_blur<7, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<7, cv::BORDER_REFLECT>)), TEST("Gaussian blur 7x7, BORDER_REFLECT, 2 channel", (test_gaussian_blur<7, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<7, cv::BORDER_REFLECT>)), TEST("Gaussian blur 7x7, BORDER_REFLECT, 3 channel", (test_gaussian_blur<7, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<7, cv::BORDER_REFLECT>)), @@ -124,6 +158,11 @@ std::vector& gaussian_blur_tests_get() { TEST("Gaussian blur 15x15, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<15, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<15, cv::BORDER_REFLECT_101>)), TEST("Gaussian blur 15x15, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<15, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<15, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 15x15, BORDER_REFLECT_101, 1 channel, random sigma", (test_gaussian_blur<15, cv::BORDER_REFLECT_101, 1, false>), (exec_gaussian_blur<15, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 15x15, BORDER_REFLECT_101, 2 channel, random sigma", (test_gaussian_blur<15, cv::BORDER_REFLECT_101, 2, false>), (exec_gaussian_blur<15, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 15x15, BORDER_REFLECT_101, 3 channel, random sigma", (test_gaussian_blur<15, cv::BORDER_REFLECT_101, 3, false>), (exec_gaussian_blur<15, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 15x15, BORDER_REFLECT_101, 4 channel, random sigma", (test_gaussian_blur<15, cv::BORDER_REFLECT_101, 4, false>), (exec_gaussian_blur<15, cv::BORDER_REFLECT_101>)), + TEST("Gaussian blur 15x15, BORDER_REFLECT, 1 channel", (test_gaussian_blur<15, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<15, cv::BORDER_REFLECT>)), TEST("Gaussian blur 15x15, BORDER_REFLECT, 2 channel", (test_gaussian_blur<15, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<15, cv::BORDER_REFLECT>)), TEST("Gaussian blur 15x15, BORDER_REFLECT, 3 channel", (test_gaussian_blur<15, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<15, cv::BORDER_REFLECT>)), diff --git a/doc/opencv.md b/doc/opencv.md index 3e801b6de..8e8e2944c 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -104,7 +104,8 @@ Notes on parameters: ### `gaussian_blur` Blurs an image using a Gaussian filter.\ -Currently does not support non-zero margins. Kernel shape is restricted to square (`kernelWidth == kernelHeight`). Kernel standard deviation cannot be customized via `sigmaX` and `sigmaY` and is calculated based on kernel size. +Currently does not support non-zero margins. Kernel shape is restricted to square (`kernelWidth == kernelHeight`). The filter's +standard deviation must be the same in horizontal and vertical directions (`sigma_x == sigma_y`). Notes on parameters: * `depth` - only supports `CV_8U` depth. diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 698b9286f..10a8e945e 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1226,8 +1226,14 @@ kleidicv_error_t kleidicv_filter_context_release( /// @ref KLEIDICV_MAXIMUM_CHANNEL_COUNT. /// @param kernel_width Width of the Gaussian kernel. /// @param kernel_height Height of the Gaussian kernel. -/// @param sigma_x Horizontal sigma (standard deviation) value. -/// @param sigma_y Vertical sigma (standard deviation) value. +/// @param sigma_x Horizontal sigma (standard deviation) value. If equal +/// to 0.0, Gaussian filter is approximated by the +/// probability mass function of the binomial distribution +/// in the horizontal direction. +/// @param sigma_y Vertical sigma (standard deviation) value. If equal +/// to 0.0, Gaussian filter is approximated by the +/// probability mass function of the binomial distribution +/// in the vertical direction. /// @param border_type Way of handling the border. /// @param context Pointer to filter context. /// diff --git a/kleidicv/include/kleidicv/sigma.h b/kleidicv/include/kleidicv/sigma.h new file mode 100644 index 000000000..596ff0e28 --- /dev/null +++ b/kleidicv/include/kleidicv/sigma.h @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_SIGMA_H +#define KLEIDICV_SIGMA_H + +#include +#include + +#include "kleidicv/config.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +static constexpr size_t get_half_kernel_size(size_t kernel_size) + KLEIDICV_STREAMING_COMPATIBLE { + // since kernel sizes are odd, "half" here means that + // the extra element is included + return (kernel_size >> 1) + 1; +} + +// This function is not marked as streaming compatible, as std::round is also +// not streaming compatible. +template +static std::array generate_gaussian_half_kernel( + float sigma) { + // Define the mid point of the full kernel range. + constexpr size_t kMid = HalfKernelSize - 1; + + // Define the full kernel size. + constexpr size_t KernelSize = kMid * 2 + 1; + + // Calculate the sigma manually in case it is not defined. + if (sigma == 0.0) { + sigma = static_cast(KernelSize) * 0.15 + 0.35; + } + + // Temporary float half-kernel. + std::array half_kernel_float{}; + + // Prepare the sigma value for later multiplication inside a loop. + float coefficient = 1 / -(2 * sigma * sigma); + + float sum = 0.0; + size_t j = kMid; + for (size_t i = 0; i < kMid; i++, j--) { + half_kernel_float[i] = + std::exp(static_cast(j) * static_cast(j) * coefficient); + sum += half_kernel_float[i]; + } + + // This multiplier is used for two things: + // * For normalizing the kernel values, so the sum of the final values is 1. + // (The 'sum' variable only accounts for the half of the kernel values + // without the mid point. That is the reason for the division by + // '(sum * 2 + 1)'.) + // * For converting the values to fixed-point (uint16_t), where 8 bits are + // used for the fractional part. That is the reason for the multiplication + // by 256. + float multiplier = 256 / (sum * 2 + 1); + + // Result half-kernel + std::array half_kernel{}; + + // Normalize the kernel and convert it to the fixed-point format. Rounding + // errors are diffused in the kernel. + float error = 0.0; + for (size_t i = 0; i < kMid; i++) { + float value = half_kernel_float[i] * multiplier - error; + float value_rounded = std::round(value); + half_kernel[i] = static_cast(value_rounded); + error = value_rounded - value; + } + half_kernel[kMid] = static_cast(std::round(multiplier - error)); + + return half_kernel; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_SIGMA_H diff --git a/kleidicv/src/filters/gaussian_blur_api.cpp b/kleidicv/src/filters/gaussian_blur_api.cpp index 10fc41b6d..01ace13a7 100644 --- a/kleidicv/src/filters/gaussian_blur_api.cpp +++ b/kleidicv/src/filters/gaussian_blur_api.cpp @@ -28,10 +28,9 @@ kleidicv_error_t kleidicv_filter_context_create( CHECK_IMAGE_SIZE(max_image_width, max_image_height); - // naive check because non-square kernels are not supported anyway - size_t intermediate_size = (max_kernel_width == 15 || max_kernel_height == 15) - ? sizeof(uint32_t) - : sizeof(uint16_t); + // We can use the maximum size that accommodates everything due to the lack of + // information at this stage. + constexpr size_t intermediate_size = sizeof(uint32_t); auto workspace = SeparableFilterWorkspace::create( Rectangle{max_image_width, max_image_height}, max_channels, intermediate_size); diff --git a/kleidicv/src/filters/gaussian_blur_neon.cpp b/kleidicv/src/filters/gaussian_blur_neon.cpp index 71e492104..7405e2e71 100644 --- a/kleidicv/src/filters/gaussian_blur_neon.cpp +++ b/kleidicv/src/filters/gaussian_blur_neon.cpp @@ -2,6 +2,9 @@ // // SPDX-License-Identifier: Apache-2.0 +#include + +#include "kleidicv/ctypes.h" #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/kleidicv.h" #include "kleidicv/neon.h" @@ -9,20 +12,21 @@ #include "kleidicv/separable_filter_3x3_neon.h" #include "kleidicv/separable_filter_5x5_neon.h" #include "kleidicv/separable_filter_7x7_neon.h" +#include "kleidicv/sigma.h" namespace kleidicv::neon { -// Primary template for Gaussian Blur approximation filters. -template -class DiscreteGaussianBlur; +// Primary template for Gaussian Blur filters. +template +class GaussianBlur; -// Template for 3x3 Gaussian Blur approximation filters. +// Template for 3x3 Gaussian Blur binomial filters. // // [ 1, 2, 1 ] [ 1 ] // F = 1/16 * [ 2, 4, 2 ] = 1/16 * [ 2 ] * [ 1, 2, 1 ] // [ 1, 2, 1 ] [ 1 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using ScalarType = uint8_t; using SourceType = ScalarType; @@ -31,6 +35,8 @@ class DiscreteGaussianBlur { using BufferVectorType = typename VecTraits::VectorType; using DestinationType = ScalarType; + explicit GaussianBlur([[maybe_unused]] float sigma) {} + // Applies vertical filtering vector using SIMD operations. // // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T @@ -74,9 +80,9 @@ class DiscreteGaussianBlur { auto acc = src[0] + 2 * src[1] + src[2]; dst[0] = rounding_shift_right(acc, 4); } -}; // end of class DiscreteGaussianBlur +}; // end of class GaussianBlur -// Template for 5x5 Gaussian Blur approximation filters. +// Template for 5x5 Gaussian Blur binomial filters. // // [ 1, 4, 6, 4, 1 ] [ 1 ] // [ 4, 16, 24, 16, 4 ] [ 4 ] @@ -84,13 +90,13 @@ class DiscreteGaussianBlur { // [ 4, 16, 24, 16, 4 ] [ 4 ] // [ 1, 4, 6, 4, 1 ] [ 1 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using SourceType = uint8_t; using BufferType = uint16_t; using DestinationType = uint8_t; - DiscreteGaussianBlur() + explicit GaussianBlur([[maybe_unused]] float sigma) : const_6_u8_half_{vdup_n_u8(6)}, const_6_u16_{vdupq_n_u16(6)}, const_4_u16_{vdupq_n_u16(4)} {} @@ -145,9 +151,9 @@ class DiscreteGaussianBlur { uint8x8_t const_6_u8_half_; uint16x8_t const_6_u16_; uint16x8_t const_4_u16_; -}; // end of class DiscreteGaussianBlur +}; // end of class GaussianBlur -// Template for 7x7 Gaussian Blur approximation filters. +// Template for 7x7 Gaussian Blur binomial filters. // // [ 4, 14, 28, 36, 28, 14, 4 ] // [ 14, 49, 98, 126, 98, 49, 14 ] @@ -165,13 +171,13 @@ class DiscreteGaussianBlur { // [ 7 ] // [ 2 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using SourceType = uint8_t; using BufferType = uint16_t; using DestinationType = uint8_t; - DiscreteGaussianBlur() + explicit GaussianBlur([[maybe_unused]] float sigma) : const_7_u16_{vdupq_n_u16(7)}, const_7_u32_{vdupq_n_u32(7)}, const_9_u16_{vdupq_n_u16(9)} {} @@ -283,9 +289,9 @@ class DiscreteGaussianBlur { uint16x8_t const_7_u16_; uint32x4_t const_7_u32_; uint16x8_t const_9_u16_; -}; // end of class DiscreteGaussianBlur +}; // end of class GaussianBlur -// Template for 15x15 Gaussian Blur approximation filters. +// Template for 15x15 Gaussian Blur binomial filters. // // [ 16, 44, 100, 192 ... 192, 100, 44, 16 ] // [ 44, 121, 275, 528 ... 528, 275, 121, 44 ] @@ -313,13 +319,13 @@ class DiscreteGaussianBlur { // [ 11 ] // [ 4 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using SourceType = uint8_t; using BufferType = uint32_t; using DestinationType = uint8_t; - DiscreteGaussianBlur() + explicit GaussianBlur([[maybe_unused]] float sigma) : const_11_u16_{vdupq_n_u16(11)}, const_11_u32_{vdupq_n_u32(11)}, const_25_u16_{vdupq_n_u16(25)}, @@ -505,74 +511,155 @@ class DiscreteGaussianBlur { uint32x4_t const_146_u32_; uint16x4_t const_158_u16_half_; uint32x4_t const_158_u32_; -}; // end of class DiscreteGaussianBlur - -template -kleidicv_error_t discrete_gaussian_blur(const ScalarType *src, - size_t src_stride, ScalarType *dst, - size_t dst_stride, size_t width, - size_t height, size_t channels, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) { - CHECK_POINTERS(context); - CHECK_POINTER_AND_STRIDE(src, src_stride, height); - CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); - CHECK_IMAGE_SIZE(width, height); +}; // end of class GaussianBlur - if (width < KernelSize - 1 || height < KernelSize - 1) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; - } +template +class GaussianBlur { + public: + using SourceType = uint8_t; + using BufferType = uint32_t; + using DestinationType = uint8_t; - if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { - return KLEIDICV_ERROR_RANGE; + explicit GaussianBlur(float sigma) + : half_kernel_( + generate_gaussian_half_kernel( + sigma)) {} + + void vertical_vector_path(uint8x16_t src[KernelSize], BufferType *dst) const { + uint16x8_t acc_last_l = vmovl_u8(vget_low_u8(src[KernelSize >> 1])); + uint16x8_t acc_last_h = vmovl_u8(vget_high_u8(src[KernelSize >> 1])); + + uint32x4_t acc_l_l = + vmull_n_u16(vget_low_u16(acc_last_l), half_kernel_[KernelSize >> 1]); + uint32x4_t acc_l_h = + vmull_n_u16(vget_high_u16(acc_last_l), half_kernel_[KernelSize >> 1]); + uint32x4_t acc_h_l = + vmull_n_u16(vget_low_u16(acc_last_h), half_kernel_[KernelSize >> 1]); + uint32x4_t acc_h_h = + vmull_n_u16(vget_high_u16(acc_last_h), half_kernel_[KernelSize >> 1]); + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 0; i < (KernelSize >> 1); i++) { + size_t j = KernelSize - i - 1; + uint16x8_t acc_l = vaddl_u8(vget_low_u8(src[i]), vget_low_u8(src[j])); + uint16x8_t acc_h = vaddl_u8(vget_high_u8(src[i]), vget_high_u8(src[j])); + + acc_l_l = vmlal_n_u16(acc_l_l, vget_low_u16(acc_l), half_kernel_[i]); + acc_l_h = vmlal_n_u16(acc_l_h, vget_high_u16(acc_l), half_kernel_[i]); + acc_h_l = vmlal_n_u16(acc_h_l, vget_low_u16(acc_h), half_kernel_[i]); + acc_h_h = vmlal_n_u16(acc_h_h, vget_high_u16(acc_h), half_kernel_[i]); + } + + uint32x4x4_t result = {acc_l_l, acc_l_h, acc_h_l, acc_h_h}; + + vst1q_u32_x4(&dst[0], result); } - auto *workspace = reinterpret_cast(context); + void vertical_scalar_path(const SourceType src[KernelSize], + BufferType *dst) const { + uint32_t acc = static_cast(src[0]) * half_kernel_[0]; - if constexpr (KernelSize == 15) { - if (workspace->intermediate_size() < sizeof(uint32_t)) { - return KLEIDICV_ERROR_CONTEXT_MISMATCH; + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i <= (KernelSize >> 1); i++) { + acc += static_cast(src[i]) * half_kernel_[i]; } - } - if (workspace->channels() < channels) { - return KLEIDICV_ERROR_CONTEXT_MISMATCH; + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = (KernelSize >> 1) + 1; i < KernelSize; i++) { + size_t j = KernelSize - i - 1; + acc += static_cast(src[i]) * half_kernel_[j]; + } + + dst[0] = acc; } - Rectangle rect{width, height}; - const Rectangle &context_rect = workspace->image_size(); - if (context_rect.width() < width || context_rect.height() < height) { - return KLEIDICV_ERROR_CONTEXT_MISMATCH; + void horizontal_vector_path(uint32x4_t src[KernelSize], + DestinationType *dst) const { + uint32x4_t acc = + vmulq_n_u32(src[KernelSize >> 1], half_kernel_[KernelSize >> 1]); + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 0; i < (KernelSize >> 1); i++) { + size_t j = KernelSize - i - 1; + uint32x4_t acc_inner = vaddq_u32(src[i], src[j]); + acc = vmlaq_n_u32(acc, acc_inner, half_kernel_[i]); + } + + uint32x4_t acc_u32 = vrshrq_n_u32(acc, 16); + uint16x4_t narrowed = vmovn_u32(acc_u32); + uint8x8_t interleaved = + vuzp1_u8(vreinterpret_u8_u16(narrowed), vreinterpret_u8_u16(narrowed)); + uint32_t result = vget_lane_u32(vreinterpret_u32_u8(interleaved), 0); + memcpy(&dst[0], &result, sizeof(result)); } - auto fixed_border_type = get_fixed_border_type(border_type); + void horizontal_scalar_path(const BufferType src[KernelSize], + DestinationType *dst) const { + uint32_t acc = src[0] * half_kernel_[0]; - if (!fixed_border_type) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i <= (KernelSize >> 1); i++) { + acc += src[i] * half_kernel_[i]; + } + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = (KernelSize >> 1) + 1; i < KernelSize; i++) { + size_t j = KernelSize - i - 1; + acc += src[i] * half_kernel_[j]; + } + + dst[0] = static_cast(rounding_shift_right(acc, 16)); } - using GaussianBlurFilterType = DiscreteGaussianBlur; + private: + const std::array half_kernel_; +}; // end of class GaussianBlur + +template +static kleidicv_error_t gaussian_blur_fixed_kernel_size( + const ScalarType *src, size_t src_stride, ScalarType *dst, + size_t dst_stride, Rectangle &rect, size_t channels, float sigma, + FixedBorderType border_type, SeparableFilterWorkspace *workspace) { + using GaussianBlurFilter = GaussianBlur; - GaussianBlurFilterType blur; - SeparableFilter filter{blur}; + GaussianBlurFilter blur{sigma}; + SeparableFilter filter{blur}; Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, - filter); + workspace->process(rect, src_rows, dst_rows, channels, border_type, filter); + return KLEIDICV_OK; } -#define KLEIDICV_GAUSSIAN_BLUR_WRAPPER(size, ...) \ - if (kernel_width == size) { \ - return discrete_gaussian_blur(__VA_ARGS__); \ +template +static kleidicv_error_t gaussian_blur(size_t kernel_size, const ScalarType *src, + size_t src_stride, ScalarType *dst, + size_t dst_stride, Rectangle &rect, + size_t channels, float sigma, + FixedBorderType border_type, + SeparableFilterWorkspace *workspace) { + switch (kernel_size) { + case 3: + return gaussian_blur_fixed_kernel_size<3, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + case 5: + return gaussian_blur_fixed_kernel_size<5, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + case 7: + return gaussian_blur_fixed_kernel_size<7, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + case 15: + return gaussian_blur_fixed_kernel_size<15, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + default: + return KLEIDICV_ERROR_NOT_IMPLEMENTED; } - -#define KLEIDICV_GENERATE_GAUSSIAN_BLUR_WRAPPERS(...) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(3, __VA_ARGS__) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(5, __VA_ARGS__) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(7, __VA_ARGS__) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(15, __VA_ARGS__) +} KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, @@ -582,19 +669,53 @@ kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, float sigma_x, float sigma_y, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + CHECK_POINTERS(context); + auto *workspace = reinterpret_cast(context); + auto fixed_border_type = get_fixed_border_type(border_type); + if (kernel_width != kernel_height) { return KLEIDICV_ERROR_NOT_IMPLEMENTED; } - if (sigma_x != 0.0 || sigma_y != 0.0) { + if (sigma_x != sigma_y) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + if (width < kernel_width - 1 || height < kernel_width - 1) { return KLEIDICV_ERROR_NOT_IMPLEMENTED; } - KLEIDICV_GENERATE_GAUSSIAN_BLUR_WRAPPERS(src, src_stride, dst, dst_stride, - width, height, channels, border_type, - context) + if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { + return KLEIDICV_ERROR_RANGE; + } + + if (workspace->channels() < channels) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + Rectangle rect{width, height}; + const Rectangle &context_rect = workspace->image_size(); + if (context_rect.width() < width || context_rect.height() < height) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + if (!fixed_border_type) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (sigma_x == 0.0) { + return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, + rect, channels, sigma_x, *fixed_border_type, + workspace); + } - return KLEIDICV_ERROR_NOT_IMPLEMENTED; + return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, + rect, channels, sigma_x, *fixed_border_type, + workspace); } } // namespace kleidicv::neon diff --git a/kleidicv/src/filters/gaussian_blur_sc.h b/kleidicv/src/filters/gaussian_blur_sc.h index 6f60c2151..5c33af67c 100644 --- a/kleidicv/src/filters/gaussian_blur_sc.h +++ b/kleidicv/src/filters/gaussian_blur_sc.h @@ -5,33 +5,37 @@ #ifndef KLEIDICV_GAUSSIAN_BLUR_SC_H #define KLEIDICV_GAUSSIAN_BLUR_SC_H -#include +#include #include "kleidicv/kleidicv.h" #include "kleidicv/separable_filter_15x15_sc.h" #include "kleidicv/separable_filter_3x3_sc.h" #include "kleidicv/separable_filter_5x5_sc.h" #include "kleidicv/separable_filter_7x7_sc.h" +#include "kleidicv/sigma.h" #include "kleidicv/sve2.h" namespace KLEIDICV_TARGET_NAMESPACE { -// Primary template for Gaussian Blur approximation filters. -template -class DiscreteGaussianBlur; +// Primary template for Gaussian Blur filters. +template +class GaussianBlur; -// Template for 3x3 Gaussian Blur approximation filters. +// Template for 3x3 Gaussian Blur binomial filters. // // [ 1, 2, 1 ] [ 1 ] // F = 1/16 * [ 2, 4, 2 ] = 1/16 * [ 2 ] * [ 1, 2, 1 ] // [ 1, 2, 1 ] [ 1 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using SourceType = uint8_t; using BufferType = uint16_t; using DestinationType = uint8_t; + explicit GaussianBlur([[maybe_unused]] float sigma) + KLEIDICV_STREAMING_COMPATIBLE {} + // Applies vertical filtering vector using SIMD operations. // // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T @@ -73,9 +77,9 @@ class DiscreteGaussianBlur { auto acc = src[0] + 2 * src[1] + src[2]; dst[0] = rounding_shift_right(acc, 4); } -}; // end of class DiscreteGaussianBlur +}; // end of class GaussianBlur -// Template for 5x5 Gaussian Blur approximation filters. +// Template for 5x5 Gaussian Blur binomial filters. // // [ 1, 4, 6, 4, 1 ] [ 1 ] // [ 4, 16, 24, 16, 4 ] [ 4 ] @@ -83,12 +87,15 @@ class DiscreteGaussianBlur { // [ 4, 16, 24, 16, 4 ] [ 4 ] // [ 1, 4, 6, 4, 1 ] [ 1 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using SourceType = uint8_t; using BufferType = uint16_t; using DestinationType = uint8_t; + explicit GaussianBlur([[maybe_unused]] float sigma) + KLEIDICV_STREAMING_COMPATIBLE {} + // Applies vertical filtering vector using SIMD operations. // // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T @@ -139,9 +146,9 @@ class DiscreteGaussianBlur { auto acc = src[0] + src[4] + 4 * (src[1] + src[3]) + 6 * src[2]; dst[0] = rounding_shift_right(acc, 8); } -}; // end of class DiscreteGaussianBlur +}; // end of class GaussianBlur -// Template for 7x7 Gaussian Blur approximation filters. +// Template for 7x7 Gaussian Blur binomial filters. // // [ 4, 14, 28, 36, 28, 14, 4 ] // [ 14, 49, 98, 126, 98, 49, 14 ] @@ -159,12 +166,15 @@ class DiscreteGaussianBlur { // [ 7 ] // [ 2 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using SourceType = uint8_t; using BufferType = uint16_t; using DestinationType = uint8_t; + explicit GaussianBlur([[maybe_unused]] float sigma) + KLEIDICV_STREAMING_COMPATIBLE {} + // Applies vertical filtering vector using SIMD operations. // // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6 ] * @@ -262,9 +272,9 @@ class DiscreteGaussianBlur { src[4] * 14 + src[5] * 7 + src[6] * 2; dst[0] = rounding_shift_right(acc, 12); } -}; // end of class DiscreteGaussianBlur +}; // end of class GaussianBlur -// Template for 15x15 Gaussian Blur approximation filters. +// Template for 15x15 Gaussian Blur binomial filters. // // [ 16, 44, 100, 192 ... 192, 100, 44, 16 ] // [ 44, 121, 275, 528 ... 528, 275, 121, 44 ] @@ -292,12 +302,15 @@ class DiscreteGaussianBlur { // [ 11 ] // [ 4 ] template <> -class DiscreteGaussianBlur { +class GaussianBlur { public: using SourceType = uint8_t; using BufferType = uint32_t; using DestinationType = uint8_t; + explicit GaussianBlur([[maybe_unused]] float sigma) + KLEIDICV_STREAMING_COMPATIBLE {} + // Applies vertical filtering vector using SIMD operations. // // DST = [ SRC0, SRC1, SRC2, SRC3...SRC11, SRC12, SRC13, SRC14 ] * @@ -428,93 +441,490 @@ class DiscreteGaussianBlur { acc += (src[5] + src[9]) * 118 + (src[6] + src[8]) * 146 + src[7] * 158; dst[0] = rounding_shift_right(acc, 20); } -}; // end of class DiscreteGaussianBlur +}; // end of class GaussianBlur template -kleidicv_error_t discrete_gaussian_blur( - const ScalarType *src, size_t src_stride, ScalarType *dst, - size_t dst_stride, size_t width, size_t height, size_t channels, - kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { - CHECK_POINTERS(context); - CHECK_POINTER_AND_STRIDE(src, src_stride, height); - CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); - CHECK_IMAGE_SIZE(width, height); +class GaussianBlurNonBinomialBase; - if (width < KernelSize - 1 || height < KernelSize - 1) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; +template +class GaussianBlurNonBinomialBase { + protected: + explicit GaussianBlurNonBinomialBase(float sigma) + KLEIDICV_STREAMING_COMPATIBLE + : half_kernel_( + generate_gaussian_half_kernel( + sigma)) {} + + const std::array half_kernel_; +}; + +template <> +class GaussianBlur final + : public GaussianBlurNonBinomialBase { + public: + using SourceType = uint8_t; + using BufferType = uint32_t; + using DestinationType = uint8_t; + + explicit GaussianBlur(float sigma) KLEIDICV_STREAMING_COMPATIBLE + : GaussianBlurNonBinomialBase(sigma) {} + + void vertical_vector_path(svbool_t pg, svuint8_t src_0, svuint8_t src_1, + svuint8_t src_2, BufferType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + // 1 + svuint16_t acc_1_b = svmovlb_u16(src_1); + svuint16_t acc_1_t = svmovlt_u16(src_1); + + svuint32_t acc_b_b = svmullb_n_u32(acc_1_b, half_kernel_[1]); + svuint32_t acc_b_t = svmullb_n_u32(acc_1_t, half_kernel_[1]); + svuint32_t acc_t_b = svmullt_n_u32(acc_1_b, half_kernel_[1]); + svuint32_t acc_t_t = svmullt_n_u32(acc_1_t, half_kernel_[1]); + + // 0 - 2 + svuint16_t acc_0_2_b = svaddlb_u16(src_0, src_2); + svuint16_t acc_0_2_t = svaddlt_u16(src_0, src_2); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_0_2_b, half_kernel_[0]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_0_2_t, half_kernel_[0]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_0_2_b, half_kernel_[0]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_0_2_t, half_kernel_[0]); + + svuint32x4_t interleaved = svcreate4(acc_b_b, acc_b_t, acc_t_b, acc_t_t); + svst4(pg, &dst[0], interleaved); } - if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { - return KLEIDICV_ERROR_RANGE; + void horizontal_vector_path(svbool_t pg, svuint32_t src_0, svuint32_t src_1, + svuint32_t src_2, DestinationType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + // 1 + svuint32_t acc = svmul_n_u32_x(pg, src_1, half_kernel_[1]); + + // 0 - 2 + svuint32_t acc_0_2 = svadd_u32_x(pg, src_0, src_2); + acc = svmla_n_u32_x(pg, acc, acc_0_2, half_kernel_[0]); + + acc = svrshr_n_u32_x(pg, acc, 16); + svst1b_u32(pg, &dst[0], acc); } - auto *workspace = reinterpret_cast(context); + void horizontal_scalar_path(const BufferType src[3], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + uint32_t acc = src[0] * half_kernel_[0] + src[1] * half_kernel_[1] + + src[2] * half_kernel_[0]; + dst[0] = static_cast(rounding_shift_right(acc, 16)); + } +}; // end of class GaussianBlur + +template <> +class GaussianBlur final + : public GaussianBlurNonBinomialBase { + public: + using SourceType = uint8_t; + using BufferType = uint32_t; + using DestinationType = uint8_t; + + explicit GaussianBlur(float sigma) KLEIDICV_STREAMING_COMPATIBLE + : GaussianBlurNonBinomialBase(sigma) {} + + void vertical_vector_path(svbool_t pg, svuint8_t src_0, svuint8_t src_1, + svuint8_t src_2, svuint8_t src_3, svuint8_t src_4, + BufferType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + // 2 + svuint16_t acc_2_b = svmovlb_u16(src_2); + svuint16_t acc_2_t = svmovlt_u16(src_2); + + svuint32_t acc_b_b = svmullb_n_u32(acc_2_b, half_kernel_[2]); + svuint32_t acc_b_t = svmullb_n_u32(acc_2_t, half_kernel_[2]); + svuint32_t acc_t_b = svmullt_n_u32(acc_2_b, half_kernel_[2]); + svuint32_t acc_t_t = svmullt_n_u32(acc_2_t, half_kernel_[2]); + + // 1 - 3 + svuint16_t acc_1_3_b = svaddlb_u16(src_1, src_3); + svuint16_t acc_1_3_t = svaddlt_u16(src_1, src_3); - if constexpr (KernelSize == 15) { - if (workspace->intermediate_size() < sizeof(uint32_t)) { - return KLEIDICV_ERROR_CONTEXT_MISMATCH; - } + acc_b_b = svmlalb_n_u32(acc_b_b, acc_1_3_b, half_kernel_[1]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_1_3_t, half_kernel_[1]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_1_3_b, half_kernel_[1]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_1_3_t, half_kernel_[1]); + + // 0 - 4 + svuint16_t acc_0_4_b = svaddlb_u16(src_0, src_4); + svuint16_t acc_0_4_t = svaddlt_u16(src_0, src_4); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_0_4_b, half_kernel_[0]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_0_4_t, half_kernel_[0]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_0_4_b, half_kernel_[0]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_0_4_t, half_kernel_[0]); + + svuint32x4_t interleaved = svcreate4(acc_b_b, acc_b_t, acc_t_b, acc_t_t); + svst4(pg, &dst[0], interleaved); } - if (workspace->channels() < channels) { - return KLEIDICV_ERROR_CONTEXT_MISMATCH; + void horizontal_vector_path(svbool_t pg, svuint32_t src_0, svuint32_t src_1, + svuint32_t src_2, svuint32_t src_3, + svuint32_t src_4, DestinationType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + // 2 + svuint32_t acc = svmul_n_u32_x(pg, src_2, half_kernel_[2]); + + // 1 - 3 + svuint32_t acc_1_3 = svadd_u32_x(pg, src_1, src_3); + acc = svmla_n_u32_x(pg, acc, acc_1_3, half_kernel_[1]); + + // 0 - 4 + svuint32_t acc_0_4 = svadd_u32_x(pg, src_0, src_4); + acc = svmla_n_u32_x(pg, acc, acc_0_4, half_kernel_[0]); + + acc = svrshr_n_u32_x(pg, acc, 16); + svst1b_u32(pg, &dst[0], acc); } - Rectangle rect{width, height}; - const Rectangle &context_rect = workspace->image_size(); - if (context_rect.width() < width || context_rect.height() < height) { - return KLEIDICV_ERROR_CONTEXT_MISMATCH; + void horizontal_scalar_path(const BufferType src[5], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + uint32_t acc = src[0] * half_kernel_[0] + src[1] * half_kernel_[1] + + src[2] * half_kernel_[2] + src[3] * half_kernel_[1] + + src[4] * half_kernel_[0]; + dst[0] = static_cast(rounding_shift_right(acc, 16)); } +}; // end of class GaussianBlur +template <> +class GaussianBlur final + : public GaussianBlurNonBinomialBase { + public: + using SourceType = uint8_t; + using BufferType = uint32_t; + using DestinationType = uint8_t; - auto fixed_border_type = get_fixed_border_type(border_type); + explicit GaussianBlur(float sigma) KLEIDICV_STREAMING_COMPATIBLE + : GaussianBlurNonBinomialBase(sigma) {} - if (!fixed_border_type) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; + void vertical_vector_path( + svbool_t pg, svuint8_t src_0, svuint8_t src_1, svuint8_t src_2, + svuint8_t src_3, svuint8_t src_4, svuint8_t src_5, svuint8_t src_6, + BufferType *dst) const KLEIDICV_STREAMING_COMPATIBLE { + // 3 + svuint16_t acc_3_b = svmovlb_u16(src_3); + svuint16_t acc_3_t = svmovlt_u16(src_3); + + svuint32_t acc_b_b = svmullb_n_u32(acc_3_b, half_kernel_[3]); + svuint32_t acc_b_t = svmullb_n_u32(acc_3_t, half_kernel_[3]); + svuint32_t acc_t_b = svmullt_n_u32(acc_3_b, half_kernel_[3]); + svuint32_t acc_t_t = svmullt_n_u32(acc_3_t, half_kernel_[3]); + + // 2 - 4 + svuint16_t acc_2_4_b = svaddlb_u16(src_2, src_4); + svuint16_t acc_2_4_t = svaddlt_u16(src_2, src_4); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_2_4_b, half_kernel_[2]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_2_4_t, half_kernel_[2]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_2_4_b, half_kernel_[2]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_2_4_t, half_kernel_[2]); + + // 1 - 5 + svuint16_t acc_1_5_b = svaddlb_u16(src_1, src_5); + svuint16_t acc_1_5_t = svaddlt_u16(src_1, src_5); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_1_5_b, half_kernel_[1]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_1_5_t, half_kernel_[1]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_1_5_b, half_kernel_[1]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_1_5_t, half_kernel_[1]); + + // 0 - 6 + svuint16_t acc_0_6_b = svaddlb_u16(src_0, src_6); + svuint16_t acc_0_6_t = svaddlt_u16(src_0, src_6); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_0_6_b, half_kernel_[0]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_0_6_t, half_kernel_[0]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_0_6_b, half_kernel_[0]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_0_6_t, half_kernel_[0]); + + svuint32x4_t interleaved = svcreate4(acc_b_b, acc_b_t, acc_t_b, acc_t_t); + svst4(pg, &dst[0], interleaved); + } + + void horizontal_vector_path( + svbool_t pg, svuint32_t src_0, svuint32_t src_1, svuint32_t src_2, + svuint32_t src_3, svuint32_t src_4, svuint32_t src_5, svuint32_t src_6, + DestinationType *dst) const KLEIDICV_STREAMING_COMPATIBLE { + // 3 + svuint32_t acc = svmul_n_u32_x(pg, src_3, half_kernel_[3]); + + // 2 - 4 + svuint32_t acc_2_4 = svadd_u32_x(pg, src_2, src_4); + acc = svmla_n_u32_x(pg, acc, acc_2_4, half_kernel_[2]); + + // 1 - 5 + svuint32_t acc_1_5 = svadd_u32_x(pg, src_1, src_5); + acc = svmla_n_u32_x(pg, acc, acc_1_5, half_kernel_[1]); + + // 0 - 6 + svuint32_t acc_0_6 = svadd_u32_x(pg, src_0, src_6); + acc = svmla_n_u32_x(pg, acc, acc_0_6, half_kernel_[0]); + + acc = svrshr_n_u32_x(pg, acc, 16); + svst1b_u32(pg, &dst[0], acc); } - using GaussianBlurFilterType = DiscreteGaussianBlur; + void horizontal_scalar_path(const BufferType src[7], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + uint32_t acc = src[0] * half_kernel_[0] + src[1] * half_kernel_[1] + + src[2] * half_kernel_[2] + src[3] * half_kernel_[3] + + src[4] * half_kernel_[2] + src[5] * half_kernel_[1] + + src[6] * half_kernel_[0]; + dst[0] = static_cast(rounding_shift_right(acc, 16)); + } +}; // end of class GaussianBlur + +template <> +class GaussianBlur final + : public GaussianBlurNonBinomialBase { + public: + using SourceType = uint8_t; + using BufferType = uint32_t; + using DestinationType = uint8_t; + + explicit GaussianBlur(float sigma) KLEIDICV_STREAMING_COMPATIBLE + : GaussianBlurNonBinomialBase(sigma) {} + + void vertical_vector_path( + svbool_t pg, svuint8_t src_0, svuint8_t src_1, svuint8_t src_2, + svuint8_t src_3, svuint8_t src_4, svuint8_t src_5, svuint8_t src_6, + svuint8_t src_7, svuint8_t src_8, svuint8_t src_9, svuint8_t src_10, + svuint8_t src_11, svuint8_t src_12, svuint8_t src_13, svuint8_t src_14, + BufferType *dst) const KLEIDICV_STREAMING_COMPATIBLE { + // 7 + svuint16_t acc_7_b = svmovlb_u16(src_7); + svuint16_t acc_7_t = svmovlt_u16(src_7); + + svuint32_t acc_b_b = svmullb_n_u32(acc_7_b, half_kernel_[7]); + svuint32_t acc_b_t = svmullb_n_u32(acc_7_t, half_kernel_[7]); + svuint32_t acc_t_b = svmullt_n_u32(acc_7_b, half_kernel_[7]); + svuint32_t acc_t_t = svmullt_n_u32(acc_7_t, half_kernel_[7]); + + // 6 - 8 + svuint16_t acc_6_8_b = svaddlb_u16(src_6, src_8); + svuint16_t acc_6_8_t = svaddlt_u16(src_6, src_8); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_6_8_b, half_kernel_[6]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_6_8_t, half_kernel_[6]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_6_8_b, half_kernel_[6]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_6_8_t, half_kernel_[6]); + + // 5 - 9 + svuint16_t acc_5_9_b = svaddlb_u16(src_5, src_9); + svuint16_t acc_5_9_t = svaddlt_u16(src_5, src_9); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_5_9_b, half_kernel_[5]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_5_9_t, half_kernel_[5]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_5_9_b, half_kernel_[5]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_5_9_t, half_kernel_[5]); + + // 4 - 10 + svuint16_t acc_4_10_b = svaddlb_u16(src_4, src_10); + svuint16_t acc_4_10_t = svaddlt_u16(src_4, src_10); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_4_10_b, half_kernel_[4]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_4_10_t, half_kernel_[4]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_4_10_b, half_kernel_[4]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_4_10_t, half_kernel_[4]); + + // 3 - 11 + svuint16_t acc_3_11_b = svaddlb_u16(src_3, src_11); + svuint16_t acc_3_11_t = svaddlt_u16(src_3, src_11); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_3_11_b, half_kernel_[3]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_3_11_t, half_kernel_[3]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_3_11_b, half_kernel_[3]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_3_11_t, half_kernel_[3]); + + // 2 - 12 + svuint16_t acc_2_12_b = svaddlb_u16(src_2, src_12); + svuint16_t acc_2_12_t = svaddlt_u16(src_2, src_12); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_2_12_b, half_kernel_[2]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_2_12_t, half_kernel_[2]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_2_12_b, half_kernel_[2]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_2_12_t, half_kernel_[2]); + + // 1 - 13 + svuint16_t acc_1_13_b = svaddlb_u16(src_1, src_13); + svuint16_t acc_1_13_t = svaddlt_u16(src_1, src_13); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_1_13_b, half_kernel_[1]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_1_13_t, half_kernel_[1]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_1_13_b, half_kernel_[1]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_1_13_t, half_kernel_[1]); + + // 0 - 14 + svuint16_t acc_0_14_b = svaddlb_u16(src_0, src_14); + svuint16_t acc_0_14_t = svaddlt_u16(src_0, src_14); + + acc_b_b = svmlalb_n_u32(acc_b_b, acc_0_14_b, half_kernel_[0]); + acc_b_t = svmlalb_n_u32(acc_b_t, acc_0_14_t, half_kernel_[0]); + acc_t_b = svmlalt_n_u32(acc_t_b, acc_0_14_b, half_kernel_[0]); + acc_t_t = svmlalt_n_u32(acc_t_t, acc_0_14_t, half_kernel_[0]); + + svuint32x4_t interleaved = svcreate4(acc_b_b, acc_b_t, acc_t_b, acc_t_t); + svst4(pg, &dst[0], interleaved); + } + + void horizontal_vector_path( + svbool_t pg, svuint32_t src_0, svuint32_t src_1, svuint32_t src_2, + svuint32_t src_3, svuint32_t src_4, svuint32_t src_5, svuint32_t src_6, + svuint32_t src_7, svuint32_t src_8, svuint32_t src_9, svuint32_t src_10, + svuint32_t src_11, svuint32_t src_12, svuint32_t src_13, + svuint32_t src_14, + DestinationType *dst) const KLEIDICV_STREAMING_COMPATIBLE { + // 7 + svuint32_t acc = svmul_n_u32_x(pg, src_7, half_kernel_[7]); + + // 6 - 8 + svuint32_t acc_6_8 = svadd_u32_x(pg, src_6, src_8); + acc = svmla_n_u32_x(pg, acc, acc_6_8, half_kernel_[6]); + + // 5 - 9 + svuint32_t acc_5_9 = svadd_u32_x(pg, src_5, src_9); + acc = svmla_n_u32_x(pg, acc, acc_5_9, half_kernel_[5]); + + // 4 - 10 + svuint32_t acc_4_10 = svadd_u32_x(pg, src_4, src_10); + acc = svmla_n_u32_x(pg, acc, acc_4_10, half_kernel_[4]); - GaussianBlurFilterType blur; - SeparableFilter filter{blur}; + // 3 - 11 + svuint32_t acc_3_11 = svadd_u32_x(pg, src_3, src_11); + acc = svmla_n_u32_x(pg, acc, acc_3_11, half_kernel_[3]); + + // 2 - 12 + svuint32_t acc_2_12 = svadd_u32_x(pg, src_2, src_12); + acc = svmla_n_u32_x(pg, acc, acc_2_12, half_kernel_[2]); + + // 1 - 13 + svuint32_t acc_1_13 = svadd_u32_x(pg, src_1, src_13); + acc = svmla_n_u32_x(pg, acc, acc_1_13, half_kernel_[1]); + + // 0 - 14 + svuint32_t acc_0_14 = svadd_u32_x(pg, src_0, src_14); + acc = svmla_n_u32_x(pg, acc, acc_0_14, half_kernel_[0]); + + acc = svrshr_n_u32_x(pg, acc, 16); + svst1b_u32(pg, &dst[0], acc); + } + + void horizontal_scalar_path(const BufferType src[15], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + uint32_t acc = src[0] * half_kernel_[0] + src[1] * half_kernel_[1] + + src[2] * half_kernel_[2] + src[3] * half_kernel_[3] + + src[4] * half_kernel_[4] + src[5] * half_kernel_[5] + + src[6] * half_kernel_[6] + src[7] * half_kernel_[7] + + src[8] * half_kernel_[6] + src[9] * half_kernel_[5] + + src[10] * half_kernel_[4] + src[11] * half_kernel_[3] + + src[12] * half_kernel_[2] + src[13] * half_kernel_[1] + + src[14] * half_kernel_[0]; + dst[0] = static_cast(rounding_shift_right(acc, 16)); + } +}; // end of class GaussianBlur + +template +static kleidicv_error_t gaussian_blur_fixed_kernel_size( + const ScalarType *src, size_t src_stride, ScalarType *dst, + size_t dst_stride, Rectangle &rect, size_t channels, float sigma, + FixedBorderType border_type, + SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING_COMPATIBLE { + using GaussianBlurFilter = GaussianBlur; + + GaussianBlurFilter blur{sigma}; + SeparableFilter filter{blur}; Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, - filter); + workspace->process(rect, src_rows, dst_rows, channels, border_type, filter); + return KLEIDICV_OK; } -#define KLEIDICV_GAUSSIAN_BLUR_WRAPPER(size, ...) \ - if (kernel_width == size) { \ - return discrete_gaussian_blur(__VA_ARGS__); \ +template +static kleidicv_error_t gaussian_blur( + size_t kernel_size, const ScalarType *src, size_t src_stride, + ScalarType *dst, size_t dst_stride, Rectangle &rect, size_t channels, + float sigma, FixedBorderType border_type, + SeparableFilterWorkspace *workspace) KLEIDICV_STREAMING_COMPATIBLE { + switch (kernel_size) { + case 3: + return gaussian_blur_fixed_kernel_size<3, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + case 5: + return gaussian_blur_fixed_kernel_size<5, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + case 7: + return gaussian_blur_fixed_kernel_size<7, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + case 15: + return gaussian_blur_fixed_kernel_size<15, IsBinomial>( + src, src_stride, dst, dst_stride, rect, channels, sigma, border_type, + workspace); + default: + return KLEIDICV_ERROR_NOT_IMPLEMENTED; } +} -#define KLEIDICV_GENERATE_GAUSSIAN_BLUR_WRAPPERS(...) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(3, __VA_ARGS__) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(5, __VA_ARGS__) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(7, __VA_ARGS__) \ - KLEIDICV_GAUSSIAN_BLUR_WRAPPER(15, __VA_ARGS__) - -static kleidicv_error_t gaussian_blur_u8_entry( +static kleidicv_error_t gaussian_blur_u8_sc( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, size_t channels, size_t kernel_width, size_t kernel_height, float sigma_x, float sigma_y, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { + CHECK_POINTERS(context); + auto *workspace = reinterpret_cast(context); + auto fixed_border_type = get_fixed_border_type(border_type); + if (kernel_width != kernel_height) { return KLEIDICV_ERROR_NOT_IMPLEMENTED; } - if (sigma_x != 0.0 || sigma_y != 0.0) { + if (sigma_x != sigma_y) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + if (width < kernel_width - 1 || height < kernel_width - 1) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { + return KLEIDICV_ERROR_RANGE; + } + + if (workspace->channels() < channels) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + Rectangle rect{width, height}; + const Rectangle &context_rect = workspace->image_size(); + if (context_rect.width() < width || context_rect.height() < height) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + if (!fixed_border_type) { return KLEIDICV_ERROR_NOT_IMPLEMENTED; } - KLEIDICV_GENERATE_GAUSSIAN_BLUR_WRAPPERS(src, src_stride, dst, dst_stride, - width, height, channels, border_type, - context) + if (sigma_x == 0.0) { + return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, + rect, channels, sigma_x, *fixed_border_type, + workspace); + } - return KLEIDICV_ERROR_NOT_IMPLEMENTED; + return gaussian_blur(kernel_width, src, src_stride, dst, dst_stride, + rect, channels, sigma_x, *fixed_border_type, + workspace); } } // namespace KLEIDICV_TARGET_NAMESPACE diff --git a/kleidicv/src/filters/gaussian_blur_sme2.cpp b/kleidicv/src/filters/gaussian_blur_sme2.cpp index a95a6c828..64290a5f2 100644 --- a/kleidicv/src/filters/gaussian_blur_sme2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sme2.cpp @@ -14,9 +14,9 @@ gaussian_blur_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, float sigma_x, float sigma_y, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { - return gaussian_blur_u8_entry(src, src_stride, dst, dst_stride, width, height, - channels, kernel_width, kernel_height, sigma_x, - sigma_y, border_type, context); + return gaussian_blur_u8_sc(src, src_stride, dst, dst_stride, width, height, + channels, kernel_width, kernel_height, sigma_x, + sigma_y, border_type, context); } } // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/gaussian_blur_sve2.cpp b/kleidicv/src/filters/gaussian_blur_sve2.cpp index 764faa00e..7c329e3ef 100644 --- a/kleidicv/src/filters/gaussian_blur_sve2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sve2.cpp @@ -15,9 +15,9 @@ kleidicv_error_t gaussian_blur_u8(const uint8_t *src, size_t src_stride, float sigma_x, float sigma_y, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { - return gaussian_blur_u8_entry(src, src_stride, dst, dst_stride, width, height, - channels, kernel_width, kernel_height, sigma_x, - sigma_y, border_type, context); + return gaussian_blur_u8_sc(src, src_stride, dst, dst_stride, width, height, + channels, kernel_width, kernel_height, sigma_x, + sigma_y, border_type, context); } } // namespace kleidicv::sve2 diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index e5bab2d4b..a86519521 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -40,6 +40,11 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur7x7 opencv_perf_imgproc '*gaussianBlur7x7*' '(3840x2160, 8UC1, BORDER_REPLICATE)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur15x15 opencv_perf_imgproc '*gaussianBlur15x15*' '(3840x2160, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur3x3_CustomSigma opencv_perf_imgproc '*gaussianBlur3x3_CustomSigma*' '(3840x2160, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur5x5_CustomSigma opencv_perf_imgproc '*gaussianBlur5x5_CustomSigma*' '(3840x2160, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur7x7_CustomSigma opencv_perf_imgproc '*gaussianBlur7x7_CustomSigma*' '(3840x2160, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur15x15_CustomSigma opencv_perf_imgproc '*gaussianBlur15x15_CustomSigma*' '(3840x2160, 8UC1, BORDER_REPLICATE)')") + RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Sobel_Gx opencv_perf_imgproc '*Border3x3_sobelFilter*' '(3840x2160, 16SC1, (1, 0), BORDER_REPLICATE)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Sobel_Gy opencv_perf_imgproc '*Border3x3_sobelFilter*' '(3840x2160, 16SC1, (0, 1), BORDER_REPLICATE)')") diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index b231fd029..2d8c93fd0 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -40,6 +40,11 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur7x7 opencv_perf_imgproc '*gaussianBlur7x7*' '(1920x1080, 8UC1, BORDER_REPLICATE)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur15x15 opencv_perf_imgproc '*gaussianBlur15x15*' '(1920x1080, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur3x3_CustomSigma opencv_perf_imgproc '*gaussianBlur3x3_CustomSigma*' '(1920x1080, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur5x5_CustomSigma opencv_perf_imgproc '*gaussianBlur5x5_CustomSigma*' '(1920x1080, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur7x7_CustomSigma opencv_perf_imgproc '*gaussianBlur7x7_CustomSigma*' '(1920x1080, 8UC1, BORDER_REPLICATE)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL GaussianBlur15x15_CustomSigma opencv_perf_imgproc '*gaussianBlur15x15_CustomSigma*' '(1920x1080, 8UC1, BORDER_REPLICATE)')") + RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Sobel_Gx opencv_perf_imgproc '*Border3x3_sobelFilter*' '(1920x1080, 16SC1, (1, 0), BORDER_REPLICATE)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Sobel_Gy opencv_perf_imgproc '*Border3x3_sobelFilter*' '(1920x1080, 16SC1, (0, 1), BORDER_REPLICATE)')") diff --git a/test/api/test_gaussian_blur.cpp b/test/api/test_gaussian_blur.cpp index a5bb1465d..db663935d 100644 --- a/test/api/test_gaussian_blur.cpp +++ b/test/api/test_gaussian_blur.cpp @@ -9,6 +9,7 @@ #include "framework/kernel.h" #include "framework/utils.h" #include "kleidicv/kleidicv.h" +#include "kleidicv/sigma.h" #include "test_config.h" #define KLEIDICV_GAUSSIAN_BLUR(type, type_suffix) \ @@ -237,6 +238,226 @@ TYPED_TEST(GaussianBlur, 15x15) { .test(mask); } +TYPED_TEST(GaussianBlur, 3x3_CustomSigma) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 15, 15, 18, 8)); + test::Array2D src{18, 8, test::Options::vector_length()}; + // clang-format off + src.set(0, 0, { 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99}); + src.set(1, 0, { 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11}); + src.set(2, 0, { 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22}); + src.set(3, 0, { 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33}); + src.set(4, 0, { 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44}); + src.set(5, 0, { 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55}); + src.set(6, 0, { 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66}); + src.set(7, 0, { 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77}); + // clang-format on + + test::Array2D dst{18, 8, test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, + gaussian_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 18, 8, 1, 3, 3, 4.56, 4.56, + KLEIDICV_BORDER_TYPE_WRAP, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + + test::Array2D dst_expected{18, 8, test::Options::vector_length()}; + // clang-format off + dst_expected.set(0, 0, { 51, 51, 73, 74, 73, 62, 73, 84, 107, 118, 96, 63, 40, 51, 62, 73, 73, 62}); + dst_expected.set(1, 0, { 33, 33, 44, 55, 66, 77, 88, 110, 122, 122, 89, 66, 55, 66, 77, 77, 66, 44}); + dst_expected.set(2, 0, { 33, 44, 55, 66, 77, 88, 110, 122, 122, 89, 66, 55, 66, 77, 77, 66, 44, 33}); + dst_expected.set(3, 0, { 44, 55, 66, 77, 88, 110, 122, 122, 89, 66, 55, 66, 77, 77, 66, 44, 33, 33}); + dst_expected.set(4, 0, { 55, 66, 77, 88, 110, 122, 122, 89, 66, 55, 66, 77, 77, 66, 44, 33, 33, 44}); + dst_expected.set(5, 0, { 66, 77, 88, 110, 122, 122, 89, 66, 55, 66, 77, 77, 66, 44, 33, 33, 44, 55}); + dst_expected.set(6, 0, { 77, 88, 110, 122, 122, 89, 66, 55, 66, 77, 77, 66, 44, 33, 33, 44, 55, 66}); + dst_expected.set(7, 0, { 70, 70, 92, 103, 92, 70, 59, 70, 81, 103, 92, 70, 37, 37, 48, 59, 70, 70}); + // clang-format on + EXPECT_EQ_ARRAY2D(dst_expected, dst); +} + +TYPED_TEST(GaussianBlur, 5x5_CustomSigma) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 15, 15, 20, 8)); + test::Array2D src{20, 8, test::Options::vector_length()}; + // clang-format off + src.set(0, 0, { 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22}); + src.set(1, 0, { 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33}); + src.set(2, 0, { 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44}); + src.set(3, 0, { 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55}); + src.set(4, 0, { 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66}); + src.set(5, 0, { 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77}); + src.set(6, 0, { 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88}); + src.set(7, 0, { 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99}); + // clang-format on + + test::Array2D dst{20, 8, test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, + gaussian_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 20, 8, 1, 5, 5, 4.56, 4.56, + KLEIDICV_BORDER_TYPE_WRAP, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + + test::Array2D dst_expected{20, 8, test::Options::vector_length()}; + // clang-format off + dst_expected.set(0, 0, { 54, 65, 72, 75, 78, 81, 84, 88, 96, 91, 82, 68, 59, 54, 58, 61, 60, 59, 54, 52}); + dst_expected.set(1, 0, { 48, 58, 61, 68, 75, 86, 90, 98, 101, 92, 79, 70, 64, 60, 64, 62, 57, 52, 47, 45}); + dst_expected.set(2, 0, { 42, 48, 55, 66, 81, 92, 100, 103, 102, 89, 80, 74, 70, 66, 65, 59, 51, 45, 40, 39}); + dst_expected.set(3, 0, { 53, 59, 66, 81, 92, 100, 103, 102, 89, 80, 74, 70, 66, 65, 59, 51, 45, 44, 43, 46}); + dst_expected.set(4, 0, { 64, 70, 81, 92, 100, 103, 102, 89, 80, 74, 70, 66, 65, 59, 51, 45, 44, 48, 51, 57}); + dst_expected.set(5, 0, { 75, 85, 92, 100, 103, 102, 89, 80, 74, 70, 66, 65, 59, 51, 45, 44, 48, 55, 62, 68}); + dst_expected.set(6, 0, { 69, 79, 87, 94, 97, 91, 82, 76, 79, 76, 75, 69, 60, 48, 46, 50, 53, 61, 63, 66}); + dst_expected.set(7, 0, { 62, 73, 80, 87, 86, 84, 79, 81, 86, 85, 80, 71, 58, 49, 52, 56, 59, 62, 61, 59}); + // clang-format on + EXPECT_EQ_ARRAY2D(dst_expected, dst); +} + +TYPED_TEST(GaussianBlur, 7x7_CustomSigma) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 15, 15, 23, 8)); + test::Array2D src{23, 8, test::Options::vector_length()}; + // clang-format off + src.set(0, 0, { 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55}); + src.set(1, 0, { 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66}); + src.set(2, 0, { 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77}); + src.set(3, 0, { 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88}); + src.set(4, 0, { 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99}); + src.set(5, 0, { 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111}); + src.set(6, 0, { 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222}); + src.set(7, 0, { 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33}); + // clang-format on + + test::Array2D dst{23, 8, test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, + gaussian_blur()(src.data(), src.stride(), dst.data(), + dst.stride(), 23, 8, 1, 7, 7, 4.56, 4.56, + KLEIDICV_BORDER_TYPE_WRAP, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + + test::Array2D dst_expected{23, 8, test::Options::vector_length()}; + // clang-format off + dst_expected.set(0, 0, { 76, 78, 77, 76, 82, 87, 90, 90, 85, 81, 77, 71, 65, 60, 56, 55, 56, 58, 62, 67, 68, 69, 71}); + dst_expected.set(1, 0, { 73, 75, 73, 75, 83, 88, 91, 91, 87, 83, 77, 72, 66, 61, 56, 55, 55, 56, 60, 65, 65, 66, 69}); + dst_expected.set(2, 0, { 69, 70, 72, 76, 84, 89, 92, 92, 89, 83, 78, 72, 67, 62, 57, 55, 54, 54, 58, 61, 61, 62, 65}); + dst_expected.set(3, 0, { 69, 73, 77, 78, 86, 91, 93, 93, 88, 83, 78, 72, 67, 61, 57, 53, 52, 52, 55, 62, 63, 64, 67}); + dst_expected.set(4, 0, { 82, 85, 85, 86, 91, 93, 93, 88, 83, 78, 72, 67, 61, 57, 53, 52, 52, 55, 62, 70, 72, 76, 78}); + dst_expected.set(5, 0, { 82, 85, 85, 85, 90, 92, 90, 88, 83, 77, 72, 67, 62, 58, 53, 52, 53, 56, 64, 72, 74, 76, 78}); + dst_expected.set(6, 0, { 81, 84, 83, 83, 87, 88, 89, 88, 83, 78, 73, 68, 64, 58, 54, 53, 54, 57, 65, 72, 73, 74, 77}); + dst_expected.set(7, 0, { 78, 81, 80, 80, 83, 87, 89, 88, 84, 79, 74, 70, 64, 59, 54, 54, 55, 58, 65, 70, 70, 72, 74}); + // clang-format on + EXPECT_EQ_ARRAY2D(dst_expected, dst); +} + +TYPED_TEST(GaussianBlur, 15x15_CustomSigma) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 15, 15, 40, 22)); + test::Array2D src{40, 22, test::Options::vector_length()}; + // clang-format off + src.set(0, 0, { 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, + 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44}); + src.set(1, 0, { 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, + 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55}); + src.set(2, 0, { 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, + 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66}); + src.set(3, 0, { 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, + 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77}); + src.set(4, 0, { 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, + 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88}); + src.set(5, 0, { 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, + 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99}); + src.set(6, 0, { 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, + 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111}); + src.set(7, 0, { 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, + 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222}); + src.set(8, 0, { 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, + 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33}); + src.set(9, 0, { 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, + 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44}); + src.set(10, 0, { 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, + 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55}); + src.set(11, 0, { 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, + 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66}); + src.set(12, 0, { 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, + 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77}); + src.set(13, 0, { 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, + 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88}); + src.set(14, 0, { 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, + 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99}); + src.set(15, 0, { 77, 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, + 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11}); + src.set(16, 0, { 88, 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, + 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22}); + src.set(17, 0, { 99, 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, + 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33}); + src.set(18, 0, { 11, 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, + 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44}); + src.set(19, 0, { 22, 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, + 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55}); + src.set(20, 0, { 33, 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, + 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66}); + src.set(21, 0, { 44, 55, 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, + 66, 77, 88, 99, 111, 222, 33, 44, 55, 66, 77, 88, 99, 11, 22, 33, 44, 55, 66, 77}); + // clang-format on + + test::Array2D dst{40, 22, test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, + gaussian_blur()( + src.data(), src.stride(), dst.data(), dst.stride(), 40, 22, 1, + 15, 15, 4.56, 4.56, KLEIDICV_BORDER_TYPE_WRAP, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + + test::Array2D dst_expected{40, 22, test::Options::vector_length()}; + // clang-format off + dst_expected.set(0, 0, { 60, 63, 67, 71, 74, 77, 79, 81, 81, 81, 79, 77, 73, 70, 66, 64, 62, 61, 63, 65, + 68, 71, 74, 76, 79, 81, 81, 81, 79, 77, 73, 70, 66, 63, 60, 57, 56, 56, 56, 58}); + dst_expected.set(1, 0, { 63, 66, 70, 74, 77, 79, 80, 81, 81, 80, 78, 75, 71, 68, 65, 63, 62, 62, 64, 66, + 69, 73, 75, 78, 79, 81, 81, 80, 78, 75, 71, 68, 65, 62, 59, 58, 57, 58, 59, 60}); + dst_expected.set(2, 0, { 67, 70, 73, 76, 79, 80, 81, 80, 80, 78, 76, 73, 70, 67, 64, 63, 62, 63, 65, 68, + 71, 74, 76, 78, 80, 80, 80, 78, 76, 73, 70, 67, 64, 62, 60, 59, 59, 60, 62, 64}); + dst_expected.set(3, 0, { 71, 74, 76, 79, 80, 80, 80, 79, 78, 76, 74, 71, 68, 66, 64, 63, 63, 65, 67, 70, + 73, 76, 77, 79, 79, 79, 78, 76, 74, 71, 68, 66, 64, 62, 61, 62, 62, 64, 66, 68}); + dst_expected.set(4, 0, { 74, 77, 79, 80, 80, 80, 79, 78, 77, 74, 72, 69, 67, 65, 64, 64, 65, 67, 69, 72, + 75, 77, 78, 79, 79, 78, 77, 74, 72, 69, 67, 65, 64, 63, 63, 64, 66, 68, 70, 72}); + dst_expected.set(5, 0, { 77, 79, 80, 80, 80, 79, 78, 76, 74, 72, 70, 68, 66, 65, 65, 66, 67, 69, 71, 74, + 76, 77, 78, 78, 77, 76, 74, 72, 70, 68, 66, 65, 65, 65, 66, 67, 69, 71, 73, 75}); + dst_expected.set(6, 0, { 79, 80, 81, 80, 79, 78, 76, 74, 72, 70, 68, 67, 66, 66, 66, 67, 69, 71, 73, 75, + 77, 77, 77, 77, 76, 74, 72, 70, 68, 67, 66, 66, 66, 67, 68, 70, 72, 74, 76, 78}); + dst_expected.set(7, 0, { 81, 81, 80, 79, 78, 76, 74, 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, + 77, 77, 76, 75, 74, 72, 70, 69, 67, 66, 66, 67, 68, 69, 71, 73, 75, 77, 79, 80}); + dst_expected.set(8, 0, { 81, 81, 80, 78, 77, 74, 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, 77, + 77, 76, 75, 74, 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 76, 78, 79, 81, 81}); + dst_expected.set(9, 0, { 81, 80, 78, 76, 74, 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, 77, 77, + 76, 75, 74, 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 76, 78, 79, 81, 81, 81}); + dst_expected.set(10, 0, { 79, 78, 76, 74, 72, 70, 68, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, 77, 77, 76, + 75, 74, 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 77, 79, 80, 81, 81, 80}); + dst_expected.set(11, 0, { 77, 75, 73, 71, 69, 68, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, 77, 77, 76, 75, + 74, 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 77, 78, 79, 80, 80, 79, 78}); + dst_expected.set(12, 0, { 73, 71, 70, 68, 67, 66, 66, 66, 67, 68, 70, 71, 73, 75, 76, 77, 77, 76, 75, 74, + 72, 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, 78, 78, 79, 78, 78, 76, 75}); + dst_expected.set(13, 0, { 70, 68, 67, 66, 65, 65, 66, 67, 68, 70, 71, 73, 75, 76, 77, 77, 76, 75, 74, 72, + 70, 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, 77, 78, 78, 77, 76, 75, 73, 72}); + dst_expected.set(14, 0, { 66, 65, 64, 64, 64, 65, 66, 68, 70, 71, 73, 75, 76, 77, 77, 76, 75, 74, 72, 70, + 69, 67, 66, 66, 67, 68, 70, 71, 73, 75, 76, 77, 77, 77, 76, 75, 74, 72, 70, 68}); + dst_expected.set(15, 0, { 63, 62, 62, 62, 63, 65, 67, 69, 71, 73, 75, 77, 78, 78, 77, 76, 74, 72, 70, 68, + 67, 66, 66, 66, 68, 69, 71, 73, 75, 77, 78, 78, 77, 76, 74, 72, 70, 68, 66, 64}); + dst_expected.set(16, 0, { 60, 59, 60, 61, 63, 66, 68, 71, 73, 76, 77, 78, 78, 78, 76, 74, 72, 70, 68, 66, + 66, 65, 66, 67, 69, 71, 73, 76, 77, 78, 78, 78, 76, 74, 72, 69, 67, 64, 62, 60}); + dst_expected.set(17, 0, { 57, 58, 59, 62, 64, 67, 70, 73, 76, 78, 79, 79, 79, 77, 75, 73, 70, 67, 66, 65, + 65, 65, 66, 68, 70, 73, 76, 78, 79, 79, 79, 77, 75, 72, 69, 66, 63, 61, 59, 58}); + dst_expected.set(18, 0, { 56, 57, 59, 62, 66, 69, 72, 75, 78, 79, 80, 80, 78, 76, 74, 71, 68, 65, 64, 64, + 64, 66, 67, 70, 72, 75, 78, 79, 80, 80, 78, 76, 74, 70, 67, 63, 60, 58, 57, 56}); + dst_expected.set(19, 0, { 56, 58, 60, 64, 68, 71, 74, 77, 79, 81, 81, 80, 78, 75, 72, 68, 65, 63, 63, 63, + 64, 66, 69, 71, 74, 77, 79, 81, 81, 80, 78, 75, 72, 68, 64, 61, 58, 56, 55, 55}); + dst_expected.set(20, 0, { 56, 59, 62, 66, 70, 73, 76, 79, 81, 81, 81, 79, 76, 73, 70, 66, 64, 62, 62, 63, + 65, 68, 70, 73, 76, 79, 81, 81, 81, 79, 76, 73, 70, 66, 62, 59, 57, 55, 55, 55}); + dst_expected.set(21, 0, { 58, 60, 64, 68, 72, 75, 78, 80, 81, 81, 80, 78, 75, 72, 68, 65, 63, 62, 62, 64, + 66, 69, 72, 75, 78, 80, 81, 81, 80, 78, 75, 72, 68, 64, 60, 58, 56, 55, 55, 55}); + // clang-format on + EXPECT_EQ_ARRAY2D(dst_expected, dst); +} + TYPED_TEST(GaussianBlur, UnsupportedBorderType3x3) { using KernelTestParams = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; @@ -534,14 +755,29 @@ TYPED_TEST(GaussianBlur, ValidImageSize3x3) { test::Array2D src{validSize, validSize, test::Options::vector_length()}; src.set(0, 0, {1, 2}); - src.set(1, 0, {1, 2}); + src.set(1, 0, {4, 3}); test::Array2D dst{validSize, validSize, test::Options::vector_length()}; + test::Array2D expected{validSize, validSize, + test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( src.data(), src.stride(), dst.data(), dst.stride(), validSize, validSize, 1, 3, 3, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {3, 3}); + expected.set(1, 0, {3, 3}); + EXPECT_EQ_ARRAY2D(expected, dst); + + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( + src.data(), src.stride(), dst.data(), dst.stride(), + validSize, validSize, 1, 3, 3, 2.25, 2.25, + KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {3, 3}); + expected.set(1, 0, {2, 2}); + EXPECT_EQ_ARRAY2D(expected, dst); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -554,16 +790,35 @@ TYPED_TEST(GaussianBlur, ValidImageSize5x5) { test::Array2D src{validSize, validSize, test::Options::vector_length()}; src.set(0, 0, {1, 2, 3, 4}); - src.set(1, 0, {1, 2, 3, 4}); + src.set(1, 0, {8, 7, 6, 5}); src.set(2, 0, {1, 2, 3, 4}); - src.set(3, 0, {1, 2, 3, 4}); + src.set(3, 0, {16, 27, 38, 49}); test::Array2D dst{validSize, validSize, test::Options::vector_length()}; + test::Array2D expected{validSize, validSize, + test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( src.data(), src.stride(), dst.data(), dst.stride(), validSize, validSize, 1, 5, 5, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {5, 5, 5, 5}); + expected.set(1, 0, {6, 6, 6, 7}); + expected.set(2, 0, {9, 10, 12, 13}); + expected.set(3, 0, {11, 13, 16, 18}); + EXPECT_EQ_ARRAY2D(expected, dst); + + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( + src.data(), src.stride(), dst.data(), dst.stride(), + validSize, validSize, 1, 5, 5, 2.25, 2.25, + KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {4, 4, 4, 4}); + expected.set(1, 0, {8, 9, 9, 10}); + expected.set(2, 0, {9, 9, 10, 11}); + expected.set(3, 0, {10, 11, 12, 12}); + EXPECT_EQ_ARRAY2D(expected, dst); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -576,18 +831,41 @@ TYPED_TEST(GaussianBlur, ValidImageSize7x7) { test::Array2D src{validSize, validSize, test::Options::vector_length()}; src.set(0, 0, {1, 2, 3, 4, 5, 6}); - src.set(1, 0, {1, 2, 3, 4, 5, 6}); + src.set(1, 0, {12, 11, 10, 9, 8, 7}); src.set(2, 0, {1, 2, 3, 4, 5, 6}); src.set(3, 0, {1, 2, 3, 4, 5, 6}); - src.set(4, 0, {1, 2, 3, 4, 5, 6}); - src.set(5, 0, {1, 2, 3, 4, 5, 6}); + src.set(4, 0, {11, 22, 33, 44, 55, 66}); + src.set(5, 0, {127, 67, 37, 27, 17, 7}); test::Array2D dst{validSize, validSize, test::Options::vector_length()}; + test::Array2D expected{validSize, validSize, + test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( src.data(), src.stride(), dst.data(), dst.stride(), validSize, validSize, 1, 7, 7, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {6, 6, 6, 6, 6, 6}); + expected.set(1, 0, {6, 6, 7, 7, 8, 8}); + expected.set(2, 0, {9, 9, 10, 10, 11, 12}); + expected.set(3, 0, {16, 16, 16, 17, 18, 19}); + expected.set(4, 0, {26, 26, 25, 26, 27, 27}); + expected.set(5, 0, {32, 31, 29, 29, 30, 30}); + EXPECT_EQ_ARRAY2D(expected, dst); + + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( + src.data(), src.stride(), dst.data(), dst.stride(), + validSize, validSize, 1, 7, 7, 2.25, 2.25, + KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {5, 5, 6, 6, 6, 6}); + expected.set(1, 0, {7, 7, 8, 9, 9, 10}); + expected.set(2, 0, {13, 13, 13, 13, 13, 13}); + expected.set(3, 0, {18, 18, 19, 19, 19, 19}); + expected.set(4, 0, {22, 22, 23, 23, 23, 23}); + expected.set(5, 0, {24, 24, 24, 24, 24, 24}); + EXPECT_EQ_ARRAY2D(expected, dst); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -600,26 +878,67 @@ TYPED_TEST(GaussianBlur, ValidImageSize15x15) { test::Array2D src{validSize, validSize, test::Options::vector_length()}; src.set(0, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); - src.set(1, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); + src.set(1, 0, {28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15}); src.set(2, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); src.set(3, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); - src.set(4, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); - src.set(5, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); + src.set(4, 0, {28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15}); + src.set(5, 0, {28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15}); src.set(6, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); src.set(7, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); - src.set(8, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); + src.set(8, 0, {28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15}); src.set(9, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); src.set(10, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); - src.set(11, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); + src.set(11, 0, {247, 207, 167, 127, 87, 47, 7, 3, 7, 47, 87, 127, 167, 207}); src.set(12, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); - src.set(13, 0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}); + src.set( + 13, 0, + {255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242}); test::Array2D dst{validSize, validSize, test::Options::vector_length()}; + test::Array2D expected{validSize, validSize, + test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( src.data(), src.stride(), dst.data(), dst.stride(), validSize, validSize, 1, 15, 15, 0.0, 0.0, KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14}); + expected.set(1, 0, {13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14}); + expected.set(2, 0, {13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14}); + expected.set(3, 0, {13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}); + expected.set(4, 0, {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15}); + expected.set(5, 0, {15, 15, 15, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15}); + expected.set(6, 0, {17, 17, 17, 16, 16, 15, 15, 15, 16, 16, 17, 17, 18, 18}); + expected.set(7, 0, {21, 21, 20, 20, 19, 18, 17, 17, 18, 19, 20, 21, 21, 22}); + expected.set(8, 0, {29, 29, 28, 26, 24, 22, 21, 21, 22, 23, 25, 27, 28, 29}); + expected.set(9, 0, {40, 40, 38, 35, 32, 30, 28, 28, 29, 31, 33, 36, 38, 38}); + expected.set(10, 0, {54, 53, 50, 47, 43, 39, 37, 36, 38, 40, 44, 47, 49, 50}); + expected.set(11, 0, {67, 66, 63, 58, 54, 50, 47, 46, 47, 50, 54, 58, 61, 62}); + expected.set(12, 0, {76, 75, 72, 67, 62, 57, 54, 53, 54, 58, 62, 67, 70, 71}); + expected.set(13, 0, {80, 79, 76, 71, 65, 60, 57, 56, 57, 61, 66, 70, 73, 75}); + EXPECT_EQ_ARRAY2D(expected, dst); + + EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( + src.data(), src.stride(), dst.data(), dst.stride(), + validSize, validSize, 1, 15, 15, 2.25, 2.25, + KLEIDICV_BORDER_TYPE_REVERSE, context)); + expected.set(0, 0, {13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14}); + expected.set(1, 0, {13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14}); + expected.set(2, 0, {13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14}); + expected.set(3, 0, {13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14}); + expected.set(4, 0, {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}); + expected.set(5, 0, {15, 15, 15, 15, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15}); + expected.set(6, 0, {15, 15, 15, 15, 14, 14, 14, 14, 14, 15, 15, 16, 16, 16}); + expected.set(7, 0, {19, 19, 19, 18, 17, 16, 16, 16, 16, 17, 18, 19, 20, 20}); + expected.set(8, 0, {26, 26, 25, 23, 21, 19, 18, 18, 19, 20, 22, 24, 26, 26}); + expected.set(9, 0, {38, 37, 35, 32, 29, 26, 24, 24, 25, 27, 30, 34, 36, 37}); + expected.set(10, 0, {55, 54, 51, 46, 42, 37, 35, 34, 35, 39, 43, 47, 51, 52}); + expected.set(11, 0, {71, 70, 66, 61, 55, 49, 46, 45, 47, 51, 56, 61, 65, 67}); + expected.set(12, 0, {85, 83, 79, 73, 66, 60, 57, 55, 57, 61, 67, 73, 77, 79}); + expected.set(13, 0, {89, 88, 83, 77, 71, 65, 61, 60, 62, 66, 72, 77, 82, 83}); + EXPECT_EQ_ARRAY2D(expected, dst); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } @@ -732,36 +1051,6 @@ TYPED_TEST(GaussianBlur, OversizeImage) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, 1, 1)); TypeParam src[1], dst[1]; - EXPECT_EQ( - KLEIDICV_ERROR_RANGE, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, 3, 3, 0.0, - 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS, 1, 3, 3, - 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_RANGE, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, 5, 5, 0.0, - 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS, 1, 5, 5, - 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_RANGE, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, 7, 7, 0.0, - 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS, 1, 7, 7, - 0.0, 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); EXPECT_EQ(KLEIDICV_ERROR_RANGE, gaussian_blur()( src, sizeof(TypeParam), dst, sizeof(TypeParam), @@ -783,24 +1072,6 @@ TYPED_TEST(GaussianBlur, ChannelNumber) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1], dst[1]; - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, - validSize, KLEIDICV_MAXIMUM_CHANNEL_COUNT + 1, 3, 3, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, - validSize, KLEIDICV_MAXIMUM_CHANNEL_COUNT + 1, 5, 5, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, - validSize, KLEIDICV_MAXIMUM_CHANNEL_COUNT + 1, 7, 7, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_ERROR_RANGE, gaussian_blur()( src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, @@ -809,47 +1080,6 @@ TYPED_TEST(GaussianBlur, ChannelNumber) { EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } -TYPED_TEST(GaussianBlur, InvalidContextKernelSize) { - using KernelTestParams15x15 = GaussianBlurKernelTestParams; - kleidicv_filter_context_t *context = nullptr; - size_t validSize = KernelTestParams15x15::kKernelSize - 1; - - ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 5, 5, - validSize, validSize)); - TypeParam src[256], dst[256]; - EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 1, 3, 3, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 1, 5, 5, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 1, 7, 7, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 1, 15, 15, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - - EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); - ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, - validSize, validSize)); - - EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 1, 3, 3, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_OK, gaussian_blur()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 1, 15, 15, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); -} - TYPED_TEST(GaussianBlur, InvalidContextMaxChannels) { using KernelTestParams15x15 = GaussianBlurKernelTestParams; kleidicv_filter_context_t *context = nullptr; @@ -858,24 +1088,6 @@ TYPED_TEST(GaussianBlur, InvalidContextMaxChannels) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1], dst[1]; - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 2, 3, 3, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 2, 5, 5, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize, 2, 7, 7, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( KLEIDICV_ERROR_CONTEXT_MISMATCH, gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), @@ -892,54 +1104,6 @@ TYPED_TEST(GaussianBlur, InvalidContextImageSize) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create(&context, 1, 15, 15, validSize, validSize)); TypeParam src[1], dst[1]; - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize + 1, validSize, 1, 3, 3, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize + 1, 1, 3, 3, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize + 1, validSize + 1, 1, 3, 3, 0.0, - 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize + 1, validSize, 1, 5, 5, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize + 1, 1, 5, 5, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize + 1, validSize + 1, 1, 5, 5, 0.0, - 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize + 1, validSize, 1, 7, 7, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize, validSize + 1, 1, 7, 7, 0.0, 0.0, - KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_CONTEXT_MISMATCH, - gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - validSize + 1, validSize + 1, 1, 7, 7, 0.0, - 0.0, KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( KLEIDICV_ERROR_CONTEXT_MISMATCH, gaussian_blur()(src, sizeof(TypeParam), dst, sizeof(TypeParam), @@ -959,6 +1123,86 @@ TYPED_TEST(GaussianBlur, InvalidContextImageSize) { EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } +TYPED_TEST(GaussianBlur, InvalidUnimplementedKernelSize) { + kleidicv_filter_context_t *context = nullptr; + size_t kernel_size = 17; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, 15, 15, kernel_size, kernel_size)); + TypeParam src[1], dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + gaussian_blur()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), kernel_size, + kernel_size, 1, kernel_size, kernel_size, 0.0, 0.0, + KLEIDICV_BORDER_TYPE_REFLECT, context)); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + gaussian_blur()( + src, sizeof(TypeParam), dst, sizeof(TypeParam), kernel_size, + kernel_size, 1, kernel_size, kernel_size, 1.0, 1.0, + KLEIDICV_BORDER_TYPE_REFLECT, context)); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +template +static std::array generate_reference_kernel(float sigma) { + std::array float_kernel{}; + + for (size_t i = 0; i < Size; ++i) { + float_kernel[i] = std::exp(-1 * std::pow(i, 2) / (2 * std::pow(sigma, 2))); + } + + float sum = 0; + for (auto val : float_kernel) { + sum += val; + } + + // Sum needs to be corrected to contain all kernel values + sum = (sum * 2) - 1; + + for (auto &val : float_kernel) { + val = val / sum; + // Multiplication is needed as the results are fixed point values on + // uint16_t type + val *= 256; + } + + std::array kernel_to_return{}; + // Conversion with rounding error diffusion + float last_rounding_error = 0.0; + for (size_t i = 0; i < Size; ++i) { + float corrected_value = float_kernel[Size - 1 - i] - last_rounding_error; + float rounded_value = std::round(corrected_value); + last_rounding_error = rounded_value - corrected_value; + kernel_to_return[i] = rounded_value; + } + + return kernel_to_return; +} +template +void test_sigma() { + const std::array expected_half_kernel = + generate_reference_kernel(3.0); + const std::array actual_half_kernel = + kleidicv::generate_gaussian_half_kernel(3.0); + + EXPECT_EQ(expected_half_kernel, actual_half_kernel); + + const std::array expected_half_kernel1 = + generate_reference_kernel(((Size * 2) - 1) * 0.15 + 0.35); + const std::array actual_half_kernel1 = + kleidicv::generate_gaussian_half_kernel(0.0); + + EXPECT_EQ(expected_half_kernel1, actual_half_kernel1); +} + +TYPED_TEST(GaussianBlur, KernelGenerationFromSigma) { + test_sigma<2>(); + test_sigma<3>(); + test_sigma<4>(); + test_sigma<8>(); +} + #ifdef KLEIDICV_ALLOCATION_TESTS TEST(FilterCreate, CannotAllocateFilter) { MockMallocToFail::enable(); -- GitLab