From 75f2ed6f1580348d49e8ce293992507a0e0db6e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 11 Apr 2024 17:09:39 +0200 Subject: [PATCH 1/2] Add SVE2/SME2 support for 3x3 Gaussian blur --- .../include/kleidicv/filters/gaussian_blur.h | 14 +++++ kleidicv/src/filters/gaussian_blur_api.cpp | 7 ++- kleidicv/src/filters/gaussian_blur_sc.h | 55 +++++++++++++++++++ kleidicv/src/filters/gaussian_blur_sme2.cpp | 10 ++++ kleidicv/src/filters/gaussian_blur_sve2.cpp | 12 ++++ 5 files changed, 95 insertions(+), 3 deletions(-) diff --git a/kleidicv/include/kleidicv/filters/gaussian_blur.h b/kleidicv/include/kleidicv/filters/gaussian_blur.h index 570c245a7..9d460027a 100644 --- a/kleidicv/include/kleidicv/filters/gaussian_blur.h +++ b/kleidicv/include/kleidicv/filters/gaussian_blur.h @@ -30,6 +30,13 @@ kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, namespace sve2 { +kleidicv_error_t gaussian_blur_3x3_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, + size_t width, size_t height, + size_t channels, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, @@ -41,6 +48,13 @@ kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, namespace sme2 { +kleidicv_error_t gaussian_blur_3x3_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, + size_t width, size_t height, + size_t channels, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, diff --git a/kleidicv/src/filters/gaussian_blur_api.cpp b/kleidicv/src/filters/gaussian_blur_api.cpp index 17b4ff543..eeb6d7f74 100644 --- a/kleidicv/src/filters/gaussian_blur_api.cpp +++ b/kleidicv/src/filters/gaussian_blur_api.cpp @@ -51,9 +51,10 @@ kleidicv_error_t kleidicv_filter_release(kleidicv_filter_context_t *context) { } // extern "C" -KLEIDICV_MULTIVERSION_C_API(kleidicv_gaussian_blur_3x3_u8, - &kleidicv::neon::gaussian_blur_3x3_u8, nullptr, - nullptr); +KLEIDICV_MULTIVERSION_C_API( + kleidicv_gaussian_blur_3x3_u8, &kleidicv::neon::gaussian_blur_3x3_u8, + KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::gaussian_blur_3x3_u8), + &kleidicv::sme2::gaussian_blur_3x3_u8); KLEIDICV_MULTIVERSION_C_API( kleidicv_gaussian_blur_5x5_u8, &kleidicv::neon::gaussian_blur_5x5_u8, diff --git a/kleidicv/src/filters/gaussian_blur_sc.h b/kleidicv/src/filters/gaussian_blur_sc.h index 4c08577db..491bd454e 100644 --- a/kleidicv/src/filters/gaussian_blur_sc.h +++ b/kleidicv/src/filters/gaussian_blur_sc.h @@ -16,6 +16,61 @@ namespace KLEIDICV_TARGET_NAMESPACE { template class DiscreteGaussianBlur; +// Template for 3x3 Gaussian Blur approximation filters. +// +// [ 1, 2, 1 ] [ 1 ] +// F = 1/16 * [ 2, 4, 2 ] = 1/16 * [ 2 ] * [ 1, 2, 1 ] +// [ 1, 2, 1 ] [ 1 ] +template <> +class DiscreteGaussianBlur { + public: + using SourceType = uint8_t; + using BufferType = uint16_t; + using DestinationType = uint8_t; + + // Applies vertical filtering vector using SIMD operations. + // + // DST = [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T + void vertical_vector_path(svbool_t pg, svuint8_t src_0, svuint8_t src_1, + svuint8_t src_2, BufferType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + svuint16_t acc_0_2_b = svaddlb_u16(src_0, src_2); + svuint16_t acc_0_2_t = svaddlt_u16(src_0, src_2); + + svuint16_t acc_1_b = svshllb_n_u16(src_1, 1); + svuint16_t acc_1_t = svshllt_n_u16(src_1, 1); + + svuint16_t acc_u16_b = svadd_u16_x(pg, acc_0_2_b, acc_1_b); + svuint16_t acc_u16_t = svadd_u16_x(pg, acc_0_2_t, acc_1_t); + + svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t); + svst2(pg, &dst[0], interleaved); + } + + // Applies horizontal filtering vector using SIMD operations. + // + // DST = 1/16 * [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T + void horizontal_vector_path(svbool_t pg, svuint16_t src_0, svuint16_t src_1, + svuint16_t src_2, DestinationType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + svuint16_t acc_0_2 = svhadd_u16_x(pg, src_0, src_2); + + svuint16_t acc = svadd_u16_x(pg, acc_0_2, src_1); + acc = svrshr_x(pg, acc, 3); + + svst1b(pg, &dst[0], acc); + } + + // Applies horizontal filtering vector using scalar operations. + // + // DST = 1/16 * [ SRC0, SRC1, SRC2 ] * [ 1, 2, 1 ]T + void horizontal_scalar_path(const BufferType src[3], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + auto acc = src[0] + 2 * src[1] + src[2]; + dst[0] = rounding_shift_right(acc, 4); + } +}; // end of class DiscreteGaussianBlur + // Template for 5x5 Gaussian Blur approximation filters. // // [ 1, 4, 6, 4, 1 ] [ 1 ] diff --git a/kleidicv/src/filters/gaussian_blur_sme2.cpp b/kleidicv/src/filters/gaussian_blur_sme2.cpp index ed69922fe..9f692b3c0 100644 --- a/kleidicv/src/filters/gaussian_blur_sme2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sme2.cpp @@ -7,6 +7,16 @@ namespace kleidicv::sme2 { +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +gaussian_blur_3x3_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + size_t channels, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, + border_type, context); +} + KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, diff --git a/kleidicv/src/filters/gaussian_blur_sve2.cpp b/kleidicv/src/filters/gaussian_blur_sve2.cpp index 7e82cfbf6..12c7176de 100644 --- a/kleidicv/src/filters/gaussian_blur_sve2.cpp +++ b/kleidicv/src/filters/gaussian_blur_sve2.cpp @@ -7,6 +7,18 @@ namespace kleidicv::sve2 { +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t gaussian_blur_3x3_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, + size_t width, size_t height, + size_t channels, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context) { + return discrete_gaussian_blur(src, src_stride, dst, dst_stride, + width, height, channels, + border_type, context); +} + KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t gaussian_blur_5x5_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, -- GitLab From 5c613ec5bfa386fc18328600e93af7c39229a16c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Mon, 15 Apr 2024 17:02:15 +0200 Subject: [PATCH 2/2] Fix typos --- kleidicv/include/kleidicv/types.h | 4 ++-- kleidicv/include/kleidicv/workspace/borders.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/kleidicv/include/kleidicv/types.h b/kleidicv/include/kleidicv/types.h index 717c7d478..ffa30bbf8 100644 --- a/kleidicv/include/kleidicv/types.h +++ b/kleidicv/include/kleidicv/types.h @@ -453,12 +453,12 @@ class DoubleBufferedIndirectRows final : public IndirectRows { std::swap(db_ptr_storage_[0], db_ptr_storage_[1]); } - // Retuns indirect rows where write is allowed. + // Returns indirect rows where write is allowed. [[nodiscard]] IndirectRows write_at() KLEIDICV_STREAMING_COMPATIBLE { return IndirectRows{db_ptr_storage_[0], stride(), channels()}; } - // Retuns indirect rows where read is allowed. + // Returns indirect rows where read is allowed. [[nodiscard]] IndirectRows read_at() KLEIDICV_STREAMING_COMPATIBLE { return IndirectRows{db_ptr_storage_[1], stride(), channels()}; } diff --git a/kleidicv/include/kleidicv/workspace/borders.h b/kleidicv/include/kleidicv/workspace/borders.h index a697f0121..5e5c18be2 100644 --- a/kleidicv/include/kleidicv/workspace/borders.h +++ b/kleidicv/include/kleidicv/workspace/borders.h @@ -60,10 +60,10 @@ class FixedBorderInfo final { FixedBorderInfo(size_t height, FixedBorderType border_type) : height_(height), border_type_(border_type) {} - // Retuns offsets without the influence of any border. + // Returns offsets without the influence of any border. Offsets offsets_without_border() const { return get(-1, 0, 1); } - // Retuns offsets for columns affected by left border. + // Returns offsets for columns affected by left border. Offsets offsets_with_left_border(size_t /* column_index */) const KLEIDICV_STREAMING_COMPATIBLE { switch (border_type_) { @@ -85,7 +85,7 @@ class FixedBorderInfo final { return Offsets{}; // GCOVR_EXCL_LINE } - // Retuns offsets for columns affected by right border. + // Returns offsets for columns affected by right border. Offsets offsets_with_right_border(size_t /* column_index */) const KLEIDICV_STREAMING_COMPATIBLE { switch (border_type_) { @@ -107,7 +107,7 @@ class FixedBorderInfo final { return Offsets{}; // GCOVR_EXCL_LINE } - // Retuns offsets for rows or columns affected by any border. + // Returns offsets for rows or columns affected by any border. Offsets offsets_with_border(size_t row_or_column_index) const KLEIDICV_STREAMING_COMPATIBLE { if (row_or_column_index == 0U) { @@ -158,12 +158,12 @@ class FixedBorderInfo final { FixedBorderInfo(size_t height, FixedBorderType border_type) : height_(height), border_type_(border_type) {} - // Retuns offsets without the influence of any border. + // Returns offsets without the influence of any border. Offsets offsets_without_border() const KLEIDICV_STREAMING_COMPATIBLE { return get(-2, -1, 0, 1, 2); } - // Retuns offsets for columns affected by left border. + // Returns offsets for columns affected by left border. Offsets offsets_with_left_border(size_t column_index) const KLEIDICV_STREAMING_COMPATIBLE { switch (border_type_) { @@ -204,7 +204,7 @@ class FixedBorderInfo final { return Offsets{}; // GCOVR_EXCL_LINE } - // Retuns offsets for columns affected by right border. + // Returns offsets for columns affected by right border. Offsets offsets_with_right_border(size_t column_index) const KLEIDICV_STREAMING_COMPATIBLE { switch (border_type_) { @@ -245,7 +245,7 @@ class FixedBorderInfo final { return Offsets{}; // GCOVR_EXCL_LINE } - // Retuns offsets for rows or columns affected by any border. + // Returns offsets for rows or columns affected by any border. Offsets offsets_with_border(size_t row_or_column_index) const KLEIDICV_STREAMING_COMPATIBLE { if (row_or_column_index <= 1U) { -- GitLab