From db3c196e5510c5d3234d65777ba69852c9a00b5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Mon, 17 Jun 2024 15:34:39 +0200 Subject: [PATCH] Add custom kernel support for Gaussian blur NEON --- adapters/opencv/kleidicv_hal.cpp | 76 ++++++ adapters/opencv/kleidicv_hal.h | 23 ++ .../include/kleidicv/filters/gaussian_blur.h | 60 +++++ kleidicv/include/kleidicv/kleidicv.h | 79 ++++++- kleidicv/src/filters/gaussian_blur_api.cpp | 24 ++ kleidicv/src/filters/gaussian_blur_neon.cpp | 220 +++++++++++++++++- kleidicv/src/filters/gaussian_blur_sc.h | 2 +- kleidicv/src/filters/gaussian_blur_sme2.cpp | 52 ++++- kleidicv/src/filters/gaussian_blur_sve2.cpp | 48 +++- 9 files changed, 565 insertions(+), 19 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 4a65e5e20..ae263aa5e 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -306,6 +306,82 @@ int gaussian_blur_binomial(const uchar *src_data, size_t src_step, return convert_error(blur_err ? blur_err : release_err); } +int gaussian_blur(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int depth, int cn, + size_t margin_left, size_t margin_top, size_t margin_right, + size_t margin_bottom, size_t kernel_width, + size_t kernel_height, double sigma_x, double sigma_y, + int border_type) { + if (sigma_x != sigma_y || sigma_x <= 0.0 || sigma_y <= 0.0) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (kernel_width != kernel_height) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (src_data == dst_data) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if ((margin_left != 0) || (margin_top != 0) || (margin_right != 0) || + (margin_bottom != 0)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + switch (depth) { + case CV_8U: + break; + + default: + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_border_type_t kleidicv_border_type; + if (from_opencv(border_type, kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + decltype(kleidicv_gaussian_blur_sigma_3x3_u8) impl{nullptr}; + if ((kernel_width == 3) && (width >= 3) && (height >= 3)) { + impl = kleidicv_gaussian_blur_sigma_3x3_u8; + } else if ((kernel_width == 5) && (width >= 5) && (height >= 5)) { + impl = kleidicv_gaussian_blur_sigma_5x5_u8; + } else if ((kernel_width == 7) && (width >= 7) && (height >= 7)) { + impl = kleidicv_gaussian_blur_sigma_7x7_u8; + } else if ((kernel_width == 15) && (width >= 15) && (height >= 15)) { + impl = kleidicv_gaussian_blur_sigma_15x15_u8; + } else { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_filter_context_t *context; + size_t type_size = get_type_size(depth); + if (type_size == SIZE_MAX) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + // widening + size_t intermediate_size = 4 * type_size; + + kleidicv_rectangle_t image = { + .width = static_cast(width), + .height = static_cast(height)}; + if (kleidicv_error_t create_err = + kleidicv_filter_create(&context, cn, intermediate_size, image)) { + return convert_error(create_err); + } + + kleidicv_error_t blur_err = + impl(reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height, cn, + sigma_x, kleidicv_border_type, context); + + kleidicv_error_t release_err = kleidicv_filter_release(context); + + return convert_error(blur_err ? blur_err : release_err); +} + struct MorphologyParams { kleidicv_morphology_context_t *context; decltype(kleidicv_dilate_u8) impl; diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 5878c0115..f703b6ef0 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -52,6 +52,13 @@ int gaussian_blur_binomial(const uchar *src_data, size_t src_step, size_t margin_bottom, size_t kernel_size, int border_type); +int gaussian_blur(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int depth, int cn, + size_t margin_left, size_t margin_top, size_t margin_right, + size_t margin_bottom, size_t kernel_width, + size_t kernel_height, double sigma_x, double sigma_y, + int border_type); + int morphology_init(cvhalFilter2D **context, int operation, int src_type, int dst_type, int max_width, int max_height, int kernel_type, uchar *kernel_data, size_t kernel_step, @@ -206,6 +213,22 @@ static inline int kleidicv_gaussian_blur_binomial_with_fallback( #define cv_hal_gaussianBlurBinomial \ kleidicv_gaussian_blur_binomial_with_fallback +// gaussian_blur +static inline int kleidicv_gaussian_blur_with_fallback( + const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, + int width, int height, int depth, int cn, size_t margin_left, + size_t margin_top, size_t margin_right, size_t margin_bottom, + size_t kernel_width, size_t kernel_height, double sigma_x, double sigma_y, + int border_type) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + gaussian_blur, cv_hal_gaussianBlur, src_data, src_step, dst_data, + dst_step, width, height, depth, cn, margin_left, margin_top, margin_right, + margin_bottom, kernel_width, kernel_height, sigma_x, sigma_y, + border_type); +} +#undef cv_hal_gaussianBlur +#define cv_hal_gaussianBlur kleidicv_gaussian_blur_with_fallback + // morphology_init static inline int kleidicv_morphology_init_with_fallback( cvhalFilter2D **context, int operation, int src_type, int dst_type, diff --git a/kleidicv/include/kleidicv/filters/gaussian_blur.h b/kleidicv/include/kleidicv/filters/gaussian_blur.h index d14f75e43..9862be909 100644 --- a/kleidicv/include/kleidicv/filters/gaussian_blur.h +++ b/kleidicv/include/kleidicv/filters/gaussian_blur.h @@ -40,6 +40,26 @@ kleidicv_error_t gaussian_blur_15x15_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +kleidicv_error_t gaussian_blur_sigma_3x3_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_5x5_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_7x7_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_15x15_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + } // namespace neon namespace sve2 { @@ -72,6 +92,26 @@ kleidicv_error_t gaussian_blur_15x15_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +kleidicv_error_t gaussian_blur_sigma_3x3_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_5x5_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_7x7_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_15x15_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + } // namespace sve2 namespace sme2 { @@ -104,6 +144,26 @@ kleidicv_error_t gaussian_blur_15x15_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +kleidicv_error_t gaussian_blur_sigma_3x3_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_5x5_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_7x7_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + +kleidicv_error_t gaussian_blur_sigma_15x15_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); + } // namespace sme2 } // namespace kleidicv diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index b74a3aa2f..ceba5e0d0 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1234,8 +1234,9 @@ kleidicv_error_t kleidicv_filter_release(kleidicv_filter_context_t *context); /// Before using this function, a context must be created using /// kleidicv_filter_create, and after finished, it has to be released /// using kleidicv_filter_release. The context must be created with the same -/// image dimensions as width and height parameters, with sizeof(uint8) as -/// size_type, and with the channel number of the data as channels. \n +/// image dimensions as width and height parameters, with the intermediate_size +/// based on (Intermediate)BufferType in the DiscreteGaussianBlur class, and +/// with the channel number of the data as channels. \n /// Note, from the border types only these are supported: \n /// - @ref KLEIDICV_BORDER_TYPE_REPLICATE \n /// - @ref KLEIDICV_BORDER_TYPE_REFLECT \n @@ -1290,6 +1291,80 @@ KLEIDICV_API_DECLARATION(kleidicv_gaussian_blur_15x15_u8, const uint8_t *src, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +/// Generates a Gaussian kernel while taking into account the sigma value +/// provided, then convolves the source image with it. +/// In-place filtering is not supported. +/// +/// Width and height are the same for the source and for the destination. Number +/// of elements is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// Usage: \n +/// Before using this function, a context must be created using +/// kleidicv_filter_create, and after finished, it has to be released +/// using kleidicv_filter_release. The context must be created with the same +/// image dimensions as width and height parameters, with the intermediate_size +/// based on (Intermediate)BufferType in the DiscreteGaussianBlurSigma class, +/// and with the channel number of the data as channels. \n Note, from the +/// border types only these are supported: \n +/// - @ref KLEIDICV_BORDER_TYPE_REPLICATE \n +/// - @ref KLEIDICV_BORDER_TYPE_REFLECT \n +/// - @ref KLEIDICV_BORDER_TYPE_WRAP \n +/// - @ref KLEIDICV_BORDER_TYPE_REVERSE +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row in the source data. Must be a +/// multiple of sizeof(type) and no less than width * +/// sizeof(type) * channels, except for single-row images. +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row in the destination data. Must be a +/// multiple of sizeof(type) and no less than width * +/// sizeof(type) * channels, except for single-row images. +/// @param width Number of columns in the data. (One column consists of +/// 'channels' number of elements.) +/// @param height Number of rows in the data. +/// @param channels Number of channels in the data. Must be not more than +/// @ref KLEIDICV_MAXIMUM_CHANNEL_COUNT. +/// @param sigma The sigma (standard deviation) value to be used during +/// the creation of the Gaussian kernel. +/// @param border_type Way of handling the border. +/// @param context Pointer to filter context. +/// +KLEIDICV_API_DECLARATION(kleidicv_gaussian_blur_sigma_3x3_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + +/// @copydoc kleidicv_gaussian_blur_sigma_3x3_u8 +/// +KLEIDICV_API_DECLARATION(kleidicv_gaussian_blur_sigma_5x5_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + +/// @copydoc kleidicv_gaussian_blur_sigma_3x3_u8 +/// +KLEIDICV_API_DECLARATION(kleidicv_gaussian_blur_sigma_7x7_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + +/// @copydoc kleidicv_gaussian_blur_sigma_3x3_u8 +/// +KLEIDICV_API_DECLARATION(kleidicv_gaussian_blur_sigma_15x15_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + /// Splits a multi channel source stream into separate 1-channel streams. Width /// and height are the same for the source stream and for all the destination /// streams. Number of pixels is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. diff --git a/kleidicv/src/filters/gaussian_blur_api.cpp b/kleidicv/src/filters/gaussian_blur_api.cpp index 6732e1d29..f90886e06 100644 --- a/kleidicv/src/filters/gaussian_blur_api.cpp +++ b/kleidicv/src/filters/gaussian_blur_api.cpp @@ -71,3 +71,27 @@ KLEIDICV_MULTIVERSION_C_API( kleidicv_gaussian_blur_15x15_u8, &kleidicv::neon::gaussian_blur_15x15_u8, KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_15x15_u8), &kleidicv::sme2::gaussian_blur_15x15_u8); + +KLEIDICV_MULTIVERSION_C_API( + kleidicv_gaussian_blur_sigma_3x3_u8, + &kleidicv::neon::gaussian_blur_sigma_3x3_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_sigma_3x3_u8), + &kleidicv::sme2::gaussian_blur_sigma_3x3_u8); + +KLEIDICV_MULTIVERSION_C_API( + kleidicv_gaussian_blur_sigma_5x5_u8, + &kleidicv::neon::gaussian_blur_sigma_5x5_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_sigma_5x5_u8), + &kleidicv::sme2::gaussian_blur_sigma_5x5_u8); + +KLEIDICV_MULTIVERSION_C_API( + kleidicv_gaussian_blur_sigma_7x7_u8, + &kleidicv::neon::gaussian_blur_sigma_7x7_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_sigma_7x7_u8), + &kleidicv::sme2::gaussian_blur_sigma_7x7_u8); + +KLEIDICV_MULTIVERSION_C_API( + kleidicv_gaussian_blur_sigma_15x15_u8, + &kleidicv::neon::gaussian_blur_sigma_15x15_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_sigma_15x15_u8), + &kleidicv::sme2::gaussian_blur_sigma_15x15_u8); diff --git a/kleidicv/src/filters/gaussian_blur_neon.cpp b/kleidicv/src/filters/gaussian_blur_neon.cpp index e54bb4665..dcfc15de6 100644 --- a/kleidicv/src/filters/gaussian_blur_neon.cpp +++ b/kleidicv/src/filters/gaussian_blur_neon.cpp @@ -2,6 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 +#include + #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/kleidicv.h" #include "kleidicv/neon.h" @@ -16,6 +18,9 @@ namespace kleidicv::neon { template class DiscreteGaussianBlur; +template +class DiscreteGaussianBlurSigma; + // Template for 3x3 Gaussian Blur approximation filters. // // [ 1, 2, 1 ] [ 1 ] @@ -507,14 +512,126 @@ class DiscreteGaussianBlur { uint32x4_t const_158_u32_; }; // end of class DiscreteGaussianBlur +template +class DiscreteGaussianBlurSigma { + public: + using SourceType = uint8_t; + using BufferType = float; + using DestinationType = uint8_t; + + DiscreteGaussianBlurSigma(float32x4_t *kernel_vector, float *kernel_scalar) + : kernel_vector_(kernel_vector), kernel_scalar_(kernel_scalar) {} + + void vertical_vector_path(uint8x16_t src[KernelSize], BufferType *dst) const { + uint16x8_t acc_last_l = vmovl_u8(vget_low_u8(src[KernelSize >> 1])); + uint16x8_t acc_last_h = vmovl_u8(vget_high_u8(src[KernelSize >> 1])); + + float32x4_t acc_l_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(acc_last_l))); + float32x4_t acc_l_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(acc_last_l))); + float32x4_t acc_h_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(acc_last_h))); + float32x4_t acc_h_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(acc_last_h))); + + acc_l_l = vmulq_f32(acc_l_l, kernel_vector_[KernelSize >> 1]); + acc_l_h = vmulq_f32(acc_l_h, kernel_vector_[KernelSize >> 1]); + acc_h_l = vmulq_f32(acc_h_l, kernel_vector_[KernelSize >> 1]); + acc_h_h = vmulq_f32(acc_h_h, kernel_vector_[KernelSize >> 1]); + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 0; i < (KernelSize >> 1); i++) { + size_t j = KernelSize - i - 1; + uint16x8_t acc_l = vaddl_u8(vget_low_u8(src[i]), vget_low_u8(src[j])); + uint16x8_t acc_h = vaddl_u8(vget_high_u8(src[i]), vget_high_u8(src[j])); + + float32x4_t acc_inner_l_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(acc_l))); + float32x4_t acc_inner_l_h = + vcvtq_f32_u32(vmovl_u16(vget_high_u16(acc_l))); + float32x4_t acc_inner_h_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(acc_h))); + float32x4_t acc_inner_h_h = + vcvtq_f32_u32(vmovl_u16(vget_high_u16(acc_h))); + + acc_l_l = vfmaq_f32(acc_l_l, acc_inner_l_l, kernel_vector_[i]); + acc_l_h = vfmaq_f32(acc_l_h, acc_inner_l_h, kernel_vector_[i]); + acc_h_l = vfmaq_f32(acc_h_l, acc_inner_h_l, kernel_vector_[i]); + acc_h_h = vfmaq_f32(acc_h_h, acc_inner_h_h, kernel_vector_[i]); + } + + float32x4x4_t result = {acc_l_l, acc_l_h, acc_h_l, acc_h_h}; + + vst1q_f32_x4(&dst[0], result); + } + + void vertical_scalar_path(const SourceType src[KernelSize], + BufferType *dst) const { + float acc = static_cast(src[0]) * kernel_scalar_[0]; + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i <= (KernelSize >> 1); i++) { + acc += static_cast(src[i]) * kernel_scalar_[i]; + } + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = (KernelSize >> 1) + 1; i < KernelSize; i++) { + size_t j = KernelSize - i - 1; + acc += static_cast(src[i]) * kernel_scalar_[j]; + } + + dst[0] = acc; + } + + void horizontal_vector_path(float32x4_t src[KernelSize], + DestinationType *dst) const { + float32x4_t acc = + vmulq_f32(src[KernelSize >> 1], kernel_vector_[KernelSize >> 1]); + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 0; i < (KernelSize >> 1); i++) { + size_t j = KernelSize - i - 1; + float32x4_t acc_inner = vaddq_f32(src[i], src[j]); + acc = vfmaq_f32(acc, acc_inner, kernel_vector_[i]); + } + + uint32x4_t acc_u32 = vcvtq_u32_f32(vrndnq_f32(acc)); + uint16x4_t narrowed = vmovn_u32(acc_u32); + uint8x8_t interleaved = + vuzp1_u8(vreinterpret_u8_u16(narrowed), vreinterpret_u8_u16(narrowed)); + uint32_t result = vget_lane_u32(vreinterpret_u32_u8(interleaved), 0); + memcpy(&dst[0], &result, sizeof(result)); + } + + void horizontal_scalar_path(const BufferType src[KernelSize], + DestinationType *dst) const { + float acc = src[0] * kernel_scalar_[0]; + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i <= (KernelSize >> 1); i++) { + acc += src[i] * kernel_scalar_[i]; + } + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = (KernelSize >> 1) + 1; i < KernelSize; i++) { + size_t j = KernelSize - i - 1; + acc += src[i] * kernel_scalar_[j]; + } + + dst[0] = static_cast(std::round(acc)); + } + + private: + float32x4_t *kernel_vector_; + float *kernel_scalar_; +}; // end of class DiscreteGaussianBlurSigma + template kleidicv_error_t discrete_gaussian_blur(const ScalarType *src, size_t src_stride, ScalarType *dst, size_t dst_stride, size_t width, size_t height, size_t channels, + double sigma, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { using GaussianBlurFilterType = DiscreteGaussianBlur; + using GaussianBlurFilterTypeSigma = + DiscreteGaussianBlurSigma; CHECK_POINTERS(context); CHECK_POINTER_AND_STRIDE(src, src_stride, height); @@ -535,11 +652,18 @@ kleidicv_error_t discrete_gaussian_blur(const ScalarType *src, auto *workspace = reinterpret_cast(context); + if (sigma != 0.0 && + workspace->intermediate_size() != 4 * sizeof(ScalarType)) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + if constexpr (KernelSize == 15) { - if (workspace->intermediate_size() != 4 * sizeof(ScalarType)) { + if (sigma == 0.0 && + workspace->intermediate_size() != 4 * sizeof(ScalarType)) { return KLEIDICV_ERROR_CONTEXT_MISMATCH; } - } else if (workspace->intermediate_size() != 2 * sizeof(ScalarType)) { + } else if (sigma == 0.0 && + workspace->intermediate_size() != 2 * sizeof(ScalarType)) { return KLEIDICV_ERROR_CONTEXT_MISMATCH; } @@ -557,8 +681,48 @@ kleidicv_error_t discrete_gaussian_blur(const ScalarType *src, return KLEIDICV_ERROR_NOT_IMPLEMENTED; } - GaussianBlurFilterType blur; - SeparableFilter filter{blur}; + if (sigma == 0.0) { + GaussianBlurFilterType blur; + SeparableFilter filter{blur}; + workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, + filter); + return KLEIDICV_OK; + } + + // Custom sigma has been given, so begin kernel calculation. + const size_t kMid = KernelSize >> 1; + float kernel[kMid + 1]; + + // Prepare the sigma values for later multiplication inside a loop. + float coefficient = + 1 / -(2 * static_cast(sigma) * static_cast(sigma)); + + float sum = 0.0; + + int j = static_cast(kMid); + for (size_t i = 0; i < kMid; i++, j--) { + kernel[i] = + std::exp(static_cast(j) * static_cast(j) * coefficient); + sum += kernel[i]; + } + + // Transform the sum to represent the full range of the kernel's + // values (including "1" at the mid point), not just its half. + // Then, calculate its inverse for later multiplication inside a loop. + float multiplier = 1 / (sum * 2 + 1); + + for (size_t i = 0; i < kMid; i++) { + kernel[i] *= multiplier; + } + kernel[kMid] = multiplier; + + float32x4_t vec[kMid + 1]; + for (size_t i = 0; i <= kMid; i++) { + vec[i] = vdupq_n_f32(kernel[i]); + } + + GaussianBlurFilterTypeSigma blur{vec, kernel}; + SeparableFilter filter{blur}; workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, filter); return KLEIDICV_OK; @@ -572,7 +736,7 @@ kleidicv_error_t gaussian_blur_3x3_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -584,7 +748,7 @@ kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -596,7 +760,7 @@ kleidicv_error_t gaussian_blur_7x7_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -608,7 +772,47 @@ kleidicv_error_t gaussian_blur_15x15_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_3x3_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_5x5_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_7x7_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_15x15_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, border_type, context); } diff --git a/kleidicv/src/filters/gaussian_blur_sc.h b/kleidicv/src/filters/gaussian_blur_sc.h index 96b8c5dc7..2dd653aca 100644 --- a/kleidicv/src/filters/gaussian_blur_sc.h +++ b/kleidicv/src/filters/gaussian_blur_sc.h @@ -434,7 +434,7 @@ template kleidicv_error_t discrete_gaussian_blur( const ScalarType *src, size_t src_stride, ScalarType *dst, size_t dst_stride, size_t width, size_t height, size_t channels, - kleidicv_border_type_t border_type, + double sigma, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { using GaussianBlurFilterType = DiscreteGaussianBlur; diff --git a/kleidicv/src/filters/gaussian_blur_sme2.cpp b/kleidicv/src/filters/gaussian_blur_sme2.cpp index ec9c6700e..9955fd5f1 100644 --- a/kleidicv/src/filters/gaussian_blur_sme2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sme2.cpp @@ -13,7 +13,7 @@ gaussian_blur_3x3_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t channels, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -23,7 +23,7 @@ gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t channels, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -33,7 +33,7 @@ gaussian_blur_7x7_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t channels, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -43,7 +43,51 @@ gaussian_blur_15x15_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t channels, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, + border_type, context); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +gaussian_blur_sigma_3x3_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +gaussian_blur_sigma_5x5_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +gaussian_blur_sigma_7x7_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +gaussian_blur_sigma_15x15_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, border_type, context); } diff --git a/kleidicv/src/filters/gaussian_blur_sve2.cpp b/kleidicv/src/filters/gaussian_blur_sve2.cpp index 1e872e2f4..58d151365 100644 --- a/kleidicv/src/filters/gaussian_blur_sve2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sve2.cpp @@ -15,7 +15,7 @@ kleidicv_error_t gaussian_blur_3x3_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -27,7 +27,7 @@ kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -39,7 +39,7 @@ kleidicv_error_t gaussian_blur_7x7_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, border_type, context); } @@ -51,7 +51,47 @@ kleidicv_error_t gaussian_blur_15x15_u8(const uint8_t *src, size_t src_stride, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { return discrete_gaussian_blur(src, src_stride, dst, dst_stride, - width, height, channels, + width, height, channels, 0.0, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_3x3_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_5x5_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_7x7_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, + border_type, context); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_sigma_15x15_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, double sigma, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, sigma, border_type, context); } -- GitLab