From f6eed446f003f1e0b83b5f18161fc68f32301a32 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 28 Aug 2024 14:27:12 +0200 Subject: [PATCH] Implement float Separable Filter 2D with NEON --- adapters/opencv/kleidicv_hal.cpp | 14 +- benchmark/benchmark.cpp | 10 ++ .../opencv/test_separable_filter_2d.cpp | 36 ++++- conformity/opencv/utils.h | 2 + doc/opencv.md | 2 +- .../kleidicv/filters/separable_filter_2d.h | 18 +++ kleidicv/include/kleidicv/kleidicv.h | 6 + .../src/filters/separable_filter_2d_api.cpp | 14 ++ .../src/filters/separable_filter_2d_neon.cpp | 140 ++++++++++++------ .../include/kleidicv_thread/kleidicv_thread.h | 7 + kleidicv_thread/src/kleidicv_thread.cpp | 17 +++ test/api/test_float_conv.cpp | 17 +-- test/api/test_in_range.cpp | 17 +-- test/api/test_separable_filter_2d.cpp | 140 +++++++++++++++++- test/api/test_thread.cpp | 5 + test/framework/array.h | 55 +++++++ test/framework/border.cpp | 4 + test/framework/generator.h | 18 +++ test/framework/kernel.h | 32 +++- test/framework/utils.cpp | 7 + test/framework/utils.h | 7 + 21 files changed, 482 insertions(+), 86 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 14afc0e3b..712371740 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -47,6 +47,7 @@ static size_t get_type_size(int depth) { case CV_16S: return 2; case CV_32S: + case CV_32F: return 4; default: return SIZE_MAX; @@ -327,7 +328,8 @@ int separable_filter_2d_init(cvhalFilter2D **context, int src_type, } int operation_depth = CV_MAT_DEPTH(src_type); - if (operation_depth != CV_8U && operation_depth != CV_16U) { + if (operation_depth != CV_8U && operation_depth != CV_16U && + operation_depth != CV_32F) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } @@ -459,6 +461,16 @@ int separable_filter_2d_operation(cvhalFilter2D *context, uchar *src_data, reinterpret_cast(params->kernel_y), params->kernel_height, params->border_type, filter_context, mt); break; + case CV_32F: + filter_err = kleidicv_thread_separable_filter_2d_f32( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), + params->channels, reinterpret_cast(params->kernel_x), + params->kernel_width, + reinterpret_cast(params->kernel_y), + params->kernel_height, params->border_type, filter_context, mt); + break; default: return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 06839d728..37bfa1a29 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -290,6 +290,16 @@ static void separable_filter_2d_u16_5x5_3ch(benchmark::State& state) { } BENCHMARK(separable_filter_2d_u16_5x5_3ch); +static void separable_filter_2d_f32_5x5_1ch(benchmark::State& state) { + separable_filter_2d(state, kleidicv_separable_filter_2d_f32); +} +BENCHMARK(separable_filter_2d_f32_5x5_1ch); + +static void separable_filter_2d_f32_5x5_3ch(benchmark::State& state) { + separable_filter_2d(state, kleidicv_separable_filter_2d_f32); +} +BENCHMARK(separable_filter_2d_f32_5x5_3ch); + template static void gaussian_blur(benchmark::State& state) { kleidicv_filter_context_t* context; diff --git a/conformity/opencv/test_separable_filter_2d.cpp b/conformity/opencv/test_separable_filter_2d.cpp index 893493692..d5cdce213 100644 --- a/conformity/opencv/test_separable_filter_2d.cpp +++ b/conformity/opencv/test_separable_filter_2d.cpp @@ -92,17 +92,20 @@ bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, // One extra line allocated to be sure the kernel seed can be placed next // to the real input cv::Mat input(y + 1, x, get_opencv_matrix_type()); - // use the minimum value 1 for the input in order to properly work around - // the potential OpenCV bug (mentioned lower) - rng.fill(input, cv::RNG::UNIFORM, 1, - std::numeric_limits::max()); - uint32_t kernel_seed = rng.next(); + if constexpr (std::is_same_v) { + rng.fill(input, cv::RNG::NORMAL, 0.0, 1.0e10); + } else { + // use the minimum value 1 for the input in order to properly work + // around the potential OpenCV bug (mentioned lower) + rng.fill(input, cv::RNG::UNIFORM, 1, + std::numeric_limits::max()); + } // kernel seed is embedded into the input matrix + uint32_t kernel_seed = rng.next(); *reinterpret_cast(&input.at(input.rows - 1, 0)) = kernel_seed; - cv::Mat actual = exec_separable_filter_2d(input); cv::Mat expected = get_expected_from_subordinate(index, request_queue, @@ -118,7 +121,11 @@ bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, } } - if (are_matrices_different(0, actual, expected)) { + bool success = + (std::is_same_v && + !are_float_matrices_different(0.3, actual, expected)) || + (!are_matrices_different(0, actual, expected)); + if (!success) { fail_print_matrices(y, x, input, actual, expected); return true; } @@ -166,6 +173,21 @@ std::vector& separable_filter_2d_tests_get() { TEST("Separable Filter 2D 5x5 (u16), BORDER_REPLICATE, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), TEST("Separable Filter 2D 5x5 (u16), BORDER_REPLICATE, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), TEST("Separable Filter 2D 5x5 (u16), BORDER_REPLICATE, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT_101, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REFLECT, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + + TEST("Separable Filter 2D 5x5 (f32), BORDER_REPLICATE, 1 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REPLICATE, 2 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REPLICATE, 3 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), + TEST("Separable Filter 2D 5x5 (f32), BORDER_REPLICATE, 4 channel", (test_separable_filter_2d), (exec_separable_filter_2d)), }; // clang-format on return tests; diff --git a/conformity/opencv/utils.h b/conformity/opencv/utils.h index 321a7e590..89e884764 100644 --- a/conformity/opencv/utils.h +++ b/conformity/opencv/utils.h @@ -17,6 +17,8 @@ constexpr int get_opencv_matrix_type() { return CV_8UC(Channels); } else if constexpr (std::is_same_v) { return CV_16UC(Channels); + } else if constexpr (std::is_same_v) { + return CV_32FC(Channels); } } diff --git a/doc/opencv.md b/doc/opencv.md index 431a8e21a..7372f30f4 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -99,7 +99,7 @@ Notes on parameters: ### [`cv::sepFilter2D()`](https://docs.opencv.org/4.10.0/d4/d86/group__imgproc__filter.html#ga910e29ff7d7b105057d1625a4bf6318d) Applies a separable linear filter to an image.\ -Currently only the 5x5 kernel size is supported with `CV_8U` and `CV_16U` source, destination and kernel depths.\ +Currently only the 5x5 kernel size is supported with `CV_8U`, `CV_16U` and `CV_32F` source, destination and kernel depths.\ In-place filtering not supported. Notes on parameters: diff --git a/kleidicv/include/kleidicv/filters/separable_filter_2d.h b/kleidicv/include/kleidicv/filters/separable_filter_2d.h index cad5539f9..b633c4382 100644 --- a/kleidicv/include/kleidicv/filters/separable_filter_2d.h +++ b/kleidicv/include/kleidicv/filters/separable_filter_2d.h @@ -32,6 +32,17 @@ KLEIDICV_API_DECLARATION(kleidicv_separable_filter_2d_stripe_u16, const uint16_t *kernel_y, size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +// For internal use only. See instead kleidicv_separable_filter_2d_f32. +// Filter a horizontal stripe across an image. The stripe is defined by the +// range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_separable_filter_2d_stripe_f32, + const float *src, size_t src_stride, float *dst, + size_t dst_stride, size_t width, size_t height, + size_t y_begin, size_t y_end, size_t channels, + const float *kernel_x, size_t kernel_width, + const float *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); } namespace kleidicv { @@ -52,6 +63,13 @@ kleidicv_error_t separable_filter_2d_stripe_u16( size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +kleidicv_error_t separable_filter_2d_stripe_f32( + const float *src, size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, + const float *kernel_x, size_t kernel_width, const float *kernel_y, + size_t kernel_height, kleidicv_border_type_t border_type, + kleidicv_filter_context_t *context); + } // namespace neon namespace sve2 { diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index e18175929..ba64a50ee 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1277,6 +1277,12 @@ kleidicv_error_t kleidicv_separable_filter_2d_u16( size_t width, size_t height, size_t channels, const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +/// @copydoc kleidicv_separable_filter_2d_u8 +kleidicv_error_t kleidicv_separable_filter_2d_f32( + const float *src, size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const float *kernel_x, + size_t kernel_width, const float *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); /// Applies Gaussian blur to the source image using the specified parameters. /// In-place filtering is not supported. diff --git a/kleidicv/src/filters/separable_filter_2d_api.cpp b/kleidicv/src/filters/separable_filter_2d_api.cpp index e4adf6cea..371c41147 100644 --- a/kleidicv/src/filters/separable_filter_2d_api.cpp +++ b/kleidicv/src/filters/separable_filter_2d_api.cpp @@ -19,6 +19,10 @@ KLEIDICV_MULTIVERSION_C_API( KLEIDICV_SVE2_IMPL_IF(kleidicv::sve2::separable_filter_2d_stripe_u16), &kleidicv::sme2::separable_filter_2d_stripe_u16); +KLEIDICV_MULTIVERSION_C_API(kleidicv_separable_filter_2d_stripe_f32, + &kleidicv::neon::separable_filter_2d_stripe_f32, + nullptr, nullptr); + extern "C" { using KLEIDICV_TARGET_NAMESPACE::Rectangle; @@ -88,4 +92,14 @@ kleidicv_error_t kleidicv_separable_filter_2d_u16( kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); } +kleidicv_error_t kleidicv_separable_filter_2d_f32( + const float *src, size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const float *kernel_x, + size_t kernel_width, const float *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + return kleidicv_separable_filter_2d_stripe_f32( + src, src_stride, dst, dst_stride, width, height, 0, height, channels, + kernel_x, kernel_width, kernel_y, kernel_height, border_type, context); +} + } // extern "C" diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index da1009549..aadca93e2 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 +#include #include #include "kleidicv/ctypes.h" @@ -219,6 +220,79 @@ class SeparableFilter2D { SourceVectorType kernel_y_u16_[5]; }; // end of class SeparableFilter2D +template <> +class SeparableFilter2D { + public: + using SourceType = float; + using SourceVectorType = typename VecTraits::VectorType; + using BufferType = float; + using BufferVectorType = typename VecTraits::VectorType; + using DestinationType = float; + + // Ignored because vectors are initialized in the constructor body. + // NOLINTNEXTLINE - hicpp-member-init + SeparableFilter2D(const SourceType *kernel_x, const SourceType *kernel_y) + : kernel_x_(kernel_x), kernel_y_(kernel_y) { + for (size_t i = 0; i < 5; i++) { + kernel_x_f32_[i] = vdupq_n_f32(kernel_x[i]); + kernel_y_f32_[i] = vdupq_n_f32(kernel_y[i]); + } + } + + void vertical_vector_path(SourceVectorType src[5], + DestinationType *dst) const { + auto acc = vmulq_f32(src[0], kernel_y_f32_[0]); + + // Optimization to avoid unnecessary branching in vector code. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i < 5; i++) { + acc = vmlaq_f32(acc, src[i], kernel_y_f32_[i]); + } + + vst1q_f32(&dst[0], acc); + } + + void vertical_scalar_path(const SourceType src[5], + DestinationType *dst) const { + SourceType acc = src[0] * kernel_y_[0]; + for (size_t i = 1; i < 5; i++) { + acc += src[i] * kernel_y_[i]; + } + + dst[0] = acc; + } + + void horizontal_vector_path(SourceVectorType src[5], + DestinationType *dst) const { + SourceVectorType acc = vmulq_f32(src[0], kernel_x_f32_[0]); + + // Optimization to avoid unnecessary branching in vector code. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 1; i < 5; i++) { + acc = vmlaq_f32(acc, src[i], kernel_x_f32_[i]); + } + vst1q_f32(&dst[0], acc); + } + + void horizontal_scalar_path(const SourceType src[5], + DestinationType *dst) const { + SourceType acc = src[0] * kernel_x_[0]; + + for (size_t i = 1; i < 5; i++) { + acc += src[i] * kernel_x_[i]; + } + + dst[0] = acc; + } + + private: + const SourceType *kernel_x_; + const SourceType *kernel_y_; + + SourceVectorType kernel_x_f32_[5]; + SourceVectorType kernel_y_f32_[5]; +}; // end of class SeparableFilter2D + template static kleidicv_error_t separable_filter_2d_checks( const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, @@ -255,11 +329,11 @@ static kleidicv_error_t separable_filter_2d_checks( return KLEIDICV_OK; } -KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t separable_filter_2d_stripe_u8( - const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, - const uint8_t *kernel_x, size_t kernel_width, const uint8_t *kernel_y, +template +kleidicv_error_t separable_filter_2d_stripe_entry( + const T *src, size_t src_stride, T *dst, size_t dst_stride, size_t width, + size_t height, size_t y_begin, size_t y_end, size_t channels, + const T *kernel_x, size_t kernel_width, const T *kernel_y, size_t kernel_height, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { auto *workspace = reinterpret_cast(context); @@ -280,55 +354,35 @@ kleidicv_error_t separable_filter_2d_stripe_u8( Rectangle rect{width, height}; - using SeparableFilterClass = SeparableFilter2D; + using SeparableFilterClass = SeparableFilter2D; SeparableFilterClass filterClass{kernel_x, kernel_y}; SeparableFilter filter{filterClass}; - Rows src_rows{src, src_stride, channels}; - Rows dst_rows{dst, dst_stride, channels}; + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, *fixed_border_type, filter); return KLEIDICV_OK; } -KLEIDICV_TARGET_FN_ATTRS -kleidicv_error_t separable_filter_2d_stripe_u16( - const uint16_t *src, size_t src_stride, uint16_t *dst, size_t dst_stride, - size_t width, size_t height, size_t y_begin, size_t y_end, size_t channels, - const uint16_t *kernel_x, size_t kernel_width, const uint16_t *kernel_y, - size_t kernel_height, kleidicv_border_type_t border_type, - kleidicv_filter_context_t *context) { - auto *workspace = reinterpret_cast(context); - kleidicv_error_t checks_result = separable_filter_2d_checks( - src, src_stride, dst, dst_stride, width, height, channels, kernel_x, - kernel_width, kernel_y, kernel_height, workspace); - - if (checks_result != KLEIDICV_OK) { - return checks_result; - } - - auto fixed_border_type = get_fixed_border_type(border_type); - // if the std::optional is empty, that means that the border type is not - // supported, so there's no need to check for specific types - if (!fixed_border_type) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; +#define KLEIDICV_INSTANTIATE_WRAPPER(type, type_suffix) \ + kleidicv_error_t separable_filter_2d_stripe_##type_suffix( \ + const type *src, size_t src_stride, type *dst, size_t dst_stride, \ + size_t width, size_t height, size_t y_begin, size_t y_end, \ + size_t channels, const type *kernel_x, size_t kernel_width, \ + const type *kernel_y, size_t kernel_height, \ + kleidicv_border_type_t border_type, \ + kleidicv_filter_context_t *context) { \ + return separable_filter_2d_stripe_entry( \ + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, \ + channels, kernel_x, kernel_width, kernel_y, kernel_height, \ + border_type, context); \ } - Rectangle rect{width, height}; - - using SeparableFilterClass = SeparableFilter2D; - - SeparableFilterClass filterClass{kernel_x, kernel_y}; - SeparableFilter filter{filterClass}; - - Rows src_rows{src, src_stride, channels}; - Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, - *fixed_border_type, filter); - - return KLEIDICV_OK; -} +KLEIDICV_INSTANTIATE_WRAPPER(uint8_t, u8); +KLEIDICV_INSTANTIATE_WRAPPER(uint16_t, u16); +KLEIDICV_INSTANTIATE_WRAPPER(float, f32); } // namespace kleidicv::neon diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index 0267fa272..f4d2f0bcb 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -259,6 +259,13 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_u16( kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, kleidicv_thread_multithreading); +kleidicv_error_t kleidicv_thread_separable_filter_2d_f32( + const float *src, size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const float *kernel_x, + size_t kernel_width, const float *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading); + kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, size_t width, size_t height, size_t channels, diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index e01c06028..021f6cec9 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -460,6 +460,23 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_u16( kernel_height, context, mt); } +kleidicv_error_t kleidicv_thread_separable_filter_2d_f32( + const float *src, size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height, size_t channels, const float *kernel_x, + size_t kernel_width, const float *kernel_y, size_t kernel_height, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + auto callback = [=](size_t y_begin, size_t y_end, + kleidicv_filter_context_t *thread_context) { + return kleidicv_separable_filter_2d_stripe_f32( + src, src_stride, dst, dst_stride, width, height, y_begin, y_end, + channels, kernel_x, kernel_width, kernel_y, kernel_height, border_type, + thread_context); + }; + return kleidicv_thread_filter(callback, width, height, channels, kernel_width, + kernel_height, context, mt); +} + kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, size_t width, size_t height, size_t channels, diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp index 5f3b0dda3..a3a861843 100644 --- a/test/api/test_float_conv.cpp +++ b/test/api/test_float_conv.cpp @@ -55,32 +55,25 @@ class FloatConversionTest final { OutputType expected; }; - static float floatval(uint32_t v) { - float result; // Avoid cppcoreguidelines-init-variables. NOLINT - static_assert(sizeof(result) == sizeof(v)); - memcpy(&result, &v, sizeof(result)); - return result; - } - const float quietNaN = std::numeric_limits::quiet_NaN(); const float signalingNaN = std::numeric_limits::signaling_NaN(); const float posInfinity = std::numeric_limits::infinity(); const float negInfinity = -std::numeric_limits::infinity(); - const float minusNaN = floatval(0xFF800001); - const float plusNaN = floatval(0x7F800001); + const float minusNaN = test::floatval(0xFF800001); + const float plusNaN = test::floatval(0x7F800001); const float plusZero = 0.0F; const float minusZero = -0.0F; - const float oneNaN = floatval(0x7FC00001); + const float oneNaN = test::floatval(0x7FC00001); const float zeroDivZero = -std::numeric_limits::quiet_NaN(); const float floatMin = std::numeric_limits::min(); const float floatMax = std::numeric_limits::max(); const float posSubnormalMin = std::numeric_limits::denorm_min(); - const float posSubnormalMax = floatval(0x007FFFFF); + const float posSubnormalMax = test::floatval(0x007FFFFF); const float negSubnormalMin = -std::numeric_limits::denorm_min(); - const float negSubnormalMax = floatval(0x807FFFFF); + const float negSubnormalMax = test::floatval(0x807FFFFF); template , bool> = true, diff --git a/test/api/test_in_range.cpp b/test/api/test_in_range.cpp index b22ee5cd9..4e40daa43 100644 --- a/test/api/test_in_range.cpp +++ b/test/api/test_in_range.cpp @@ -48,32 +48,25 @@ class InRangeTest final { expected_rows(std::move(_expected_rows)) {} }; - static float floatval(uint32_t v) { - float result; // Avoid cppcoreguidelines-init-variables. NOLINT - static_assert(sizeof(result) == sizeof(v)); - memcpy(&result, &v, sizeof(result)); - return result; - } - const float quietNaN = std::numeric_limits::quiet_NaN(); const float signalingNaN = std::numeric_limits::signaling_NaN(); const float posInfinity = std::numeric_limits::infinity(); const float negInfinity = -std::numeric_limits::infinity(); - const float minusNaN = floatval(0xFF800001); - const float plusNaN = floatval(0x7F800001); + const float minusNaN = test::floatval(0xFF800001); + const float plusNaN = test::floatval(0x7F800001); const float plusZero = 0.0F; const float minusZero = -0.0F; - const float oneNaN = floatval(0x7FC00001); + const float oneNaN = test::floatval(0x7FC00001); const float zeroDivZero = -std::numeric_limits::quiet_NaN(); const float floatMin = std::numeric_limits::min(); const float floatMax = std::numeric_limits::max(); const float posSubnormalMin = std::numeric_limits::denorm_min(); - const float posSubnormalMax = floatval(0x007FFFFF); + const float posSubnormalMax = test::floatval(0x007FFFFF); const float negSubnormalMin = -std::numeric_limits::denorm_min(); - const float negSubnormalMax = floatval(0x807FFFFF); + const float negSubnormalMax = test::floatval(0x807FFFFF); void calculate_expected(const test::Array2D& source, test::Array2D& expected, diff --git a/test/api/test_separable_filter_2d.cpp b/test/api/test_separable_filter_2d.cpp index 16ad58f91..7b92916e8 100644 --- a/test/api/test_separable_filter_2d.cpp +++ b/test/api/test_separable_filter_2d.cpp @@ -13,6 +13,7 @@ KLEIDICV_API(separable_filter_2d, kleidicv_separable_filter_2d_u8, uint8_t) KLEIDICV_API(separable_filter_2d, kleidicv_separable_filter_2d_u16, uint16_t) +KLEIDICV_API(separable_filter_2d, kleidicv_separable_filter_2d_f32, float) // Implements KernelTestParams for SeparableFilter2D operators. template @@ -36,6 +37,15 @@ struct SeparableFilter2DKernelTestParams { static constexpr size_t kKernelSize = KernelSize; }; // end of struct SeparableFilter2DKernelTestParams +template +struct SeparableFilter2DKernelTestParams { + using InputType = float; + using IntermediateType = float; + using OutputType = float; + + static constexpr size_t kKernelSize = KernelSize; +}; // end of struct SeparableFilter2DKernelTestParams + static constexpr std::array kDefaultBorder = { KLEIDICV_BORDER_TYPE_REPLICATE}; @@ -92,10 +102,17 @@ class SeparableFilter2DTest : public test::KernelTest { auto kSupportedBorderValues = test::default_border_values(); // Create generators and execute test. test::SequenceGenerator tested_border_values{kSupportedBorderValues}; - test::PseudoRandomNumberGeneratorIntRange element_generator{ - 0, max_value}; - Base::test(kernel, *array_layout_generator_, *border_type_generator_, - tested_border_values, element_generator); + if constexpr (std::is_integral_v) { + test::PseudoRandomNumberGeneratorIntRange element_generator{ + 0, max_value}; + Base::test(kernel, *array_layout_generator_, *border_type_generator_, + tested_border_values, element_generator); + } else { + test::PseudoRandomNumberGeneratorFloatRange element_generator{ + 0, max_value}; + Base::test(kernel, *array_layout_generator_, *border_type_generator_, + tested_border_values, element_generator); + } } protected: @@ -133,7 +150,7 @@ class SeparableFilter2DTest : public test::KernelTest { } }; // end of class SeparableFilter2DTest -using ElementTypes = ::testing::Types; +using ElementTypes = ::testing::Types; template class SeparableFilter2D : public testing::Test {}; @@ -502,6 +519,119 @@ TEST(SeparableFilter2D, 5x5_U16OverflowVector) { EXPECT_EQ_ARRAY2D(dst_expected, dst); } +TEST(SeparableFilter2D, 5x5_F32MaxInf) { + using TypeParam = float; + + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 5, 5, 5, 7)); + + const float posInfinity = std::numeric_limits::infinity(); + const float floatMax = std::numeric_limits::max(); + + test::Array2D src{5, 7, test::Options::vector_length()}; + test::Array2D kernel_x{5, 1}; + test::Array2D kernel_y{5, 1}; + test::Array2D dst{5, 7, test::Options::vector_length()}; + test::Array2D dst_expected{5, 7, test::Options::vector_length()}; + + // clang-format off + src.set(0, 0, { 1, 1, 1, 1, 1}); + src.set(1, 0, { 1, 1, 1, 1, 1}); + src.set(2, 0, { 1, 1, 1, 1, 1}); + src.set(3, 0, { 1, 1, 1, 1, 1}); + src.set(4, 0, { 1, 1, 1, 1, 1}); + src.set(5, 0, { 1, 1, 1, 1, 1}); + src.set(6, 0, { 1, 1, 1, 1, 1}); + // clang-format on + + kernel_x.set(0, 0, {floatMax, 1, 1, 1, 1}); + kernel_y.set(0, 0, {1, 1, 1, 1, 1}); + + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + 5, 7, 1, kernel_x.data(), 5, kernel_y.data(), 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + + // clang-format off + dst_expected.set(0, 0, { posInfinity, posInfinity, posInfinity, posInfinity, posInfinity}); + dst_expected.set(1, 0, { posInfinity, posInfinity, posInfinity, posInfinity, posInfinity}); + dst_expected.set(2, 0, { posInfinity, posInfinity, posInfinity, posInfinity, posInfinity}); + dst_expected.set(3, 0, { posInfinity, posInfinity, posInfinity, posInfinity, posInfinity}); + dst_expected.set(4, 0, { posInfinity, posInfinity, posInfinity, posInfinity, posInfinity}); + dst_expected.set(5, 0, { posInfinity, posInfinity, posInfinity, posInfinity, posInfinity}); + dst_expected.set(6, 0, { posInfinity, posInfinity, posInfinity, posInfinity, posInfinity}); + // clang-format on + + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + EXPECT_EQ_ARRAY2D(dst_expected, dst); +} + +TEST(SeparableFilter2D, 5x5_F32SpecialValues) { + using TypeParam = float; + + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 5, 5, 5, 7)); + test::Array2D src{5, 7, test::Options::vector_length()}; + test::Array2D kernel_x{5, 1}; + test::Array2D kernel_y{5, 1}; + test::Array2D dst{5, 7, test::Options::vector_length()}; + test::Array2D dst_expected{5, 7, test::Options::vector_length()}; + + const float quietMinusNaN = test::floatval(0xFFC00001); + const float quietPlusNaN = std::numeric_limits::quiet_NaN(); + const float signalingNaN = std::numeric_limits::signaling_NaN(); + const float negInfinity = -std::numeric_limits::infinity(); + const float posInfinity = std::numeric_limits::infinity(); + + const float minusNaN = test::floatval(0xFF800001); + const float plusNaN = test::floatval(0x7F800001); + const float minusZero = -0.0F; + const float plusZero = 0.0F; + + const float oneNaN = test::floatval(0x7FC00001); + const float zeroDivZero = -std::numeric_limits::quiet_NaN(); + const float floatMin = std::numeric_limits::min(); + const float floatMax = std::numeric_limits::max(); + + const float posSubnormalMin = std::numeric_limits::denorm_min(); + const float posSubnormalMax = test::floatval(0x007FFFFF); + const float negSubnormalMin = -std::numeric_limits::denorm_min(); + const float negSubnormalMax = test::floatval(0x807FFFFF); + + // clang-format off + src.set(0, 0, { quietPlusNaN, signalingNaN, negInfinity, posInfinity, zeroDivZero }); + src.set(1, 0, { minusNaN, plusNaN, oneNaN, minusZero, plusZero }); + src.set(2, 0, { 1111.11, -1112.22, 113.33, floatMin, floatMax }); + src.set(3, 0, { 114.44, posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax }); + src.set(4, 0, { 111.51, 112.62, 113.73, 114.84, 114.83 }); + src.set(5, 0, { 126.66, 127.11, 128.66, 129.11, 129.1 }); + src.set(6, 0, { 11.5, 12.5, -11.5, -12.5, -12.51 }); + // clang-format on + + kernel_x.set(0, 0, {38, 0, 38, 0, 38}); + kernel_y.set(0, 0, {38, 0, 38, 0, 38}); + + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + 5, 7, 1, kernel_x.data(), 5, kernel_y.data(), 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + + // clang-format off + dst_expected.set(0, 0, {quietMinusNaN, quietMinusNaN, quietMinusNaN, oneNaN, quietPlusNaN}); + dst_expected.set(1, 0, {quietMinusNaN, quietMinusNaN, quietMinusNaN, oneNaN, quietPlusNaN}); + dst_expected.set(2, 0, {quietMinusNaN, quietMinusNaN, quietMinusNaN, oneNaN, oneNaN}); + dst_expected.set(3, 0, {quietMinusNaN, quietMinusNaN, quietMinusNaN, oneNaN, oneNaN}); + dst_expected.set(4, 0, {3875407.25, 504475.8125, posInfinity, quietPlusNaN, quietPlusNaN}); + dst_expected.set(5, 0, {898687.9375, 734736.125, 702289.5, 538337.625, 505890.96875}); + dst_expected.set(6, 0, {519479, 522684.6875, 454932.1875, 458137.875, 390385.375}); + // clang-format on + + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + EXPECT_APPROX_EQ_ARRAY2D(1e-4, dst_expected, dst); +} + TYPED_TEST(SeparableFilter2D, NullPointer) { using KernelTestParams = SeparableFilter2DKernelTestParams; kleidicv_filter_context_t *context = nullptr; diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 1abc6e92e..c70726294 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -205,6 +205,11 @@ TEST_P(Thread, separable_filter_2d_u16) { kleidicv_thread_separable_filter_2d_u16); } +TEST_P(Thread, separable_filter_2d_f32) { + check_separable_filter_2d(kleidicv_separable_filter_2d_f32, + kleidicv_thread_separable_filter_2d_f32); +} + TEST_P(Thread, SobelHorizontal1Channel) { check_unary_op(kleidicv_sobel_3x3_horizontal_s16_u8, kleidicv_thread_sobel_3x3_horizontal_s16_u8, diff --git a/test/framework/array.h b/test/framework/array.h index 8de46cf34..50bc15783 100644 --- a/test/framework/array.h +++ b/test/framework/array.h @@ -7,6 +7,7 @@ #include +#include #include #include #include @@ -214,6 +215,41 @@ class Array2D : public TwoDimensional { return std::nullopt; } + // Compares two instances for approximate equality considering only element + // bytes. Returns the location of the first mismatch, if any. + std::optional> compare_approx_to( + ElementType threshold_percent, const Array2D &other) const { + for (size_t row = 0; row < height(); ++row) { + for (size_t column = 0; column < width(); ++column) { + const ElementType *lhs = at(row, column); + const ElementType *rhs = other.at(row, column); + if (lhs != 0) { + ElementType diff = abs_diff(*lhs, *rhs); + ElementType diff_percentage = (diff / std::abs(*lhs)) * 100; + if (!lhs || !rhs || diff_percentage > threshold_percent) { + return std::make_tuple(row, column); + } + // Special handling to avoid division by 0. + } else { + // Seems like clang-tidy does not understand what the fill member + // function of test::Array2D does, so these exceptions are required + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + float expected = *(lhs); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + + float actual = *(rhs); + // Error of 1 ULP means that actual is either same as expected, or + // the next float value in negative of positive direction + if (!lhs || !rhs || (std::nextafterf(actual, expected) != expected)) { + return std::make_tuple(row, column); + } + } + } + } + + return std::nullopt; + } + // Returns a pointer to the first element. ElementType *data() { return data_; } @@ -385,6 +421,25 @@ class Array2D : public TwoDimensional { } \ } while (0 != 0) +// Compares two Array2D objects for approximate equality. +#define EXPECT_APPROX_EQ_ARRAY2D(threshold_percent, lhs, rhs) \ + do { \ + ASSERT_EQ((lhs).width(), (rhs).width()) \ + << "Mismatch in width." << std::endl; \ + ASSERT_EQ((lhs).height(), (rhs).height()) \ + << "Mismatch in height." << std::endl; \ + ASSERT_EQ((lhs).channels(), (rhs).channels()) \ + << "Mismatch in channels." << std::endl; \ + auto mismatch = (lhs).compare_approx_to((threshold_percent), (rhs)); \ + if (mismatch) { \ + auto [row, col] = *mismatch; \ + GTEST_FAIL() << "Mismatch at (row=" << row << ", col=" << col \ + << "): " << std::hex << std::showbase \ + << +(lhs).at(row, col)[0] << " vs " \ + << +(rhs).at(row, col)[0] << "." << std::endl; \ + } \ + } while (0 != 0) + // Compares two Array2D objects for inequality. #define EXPECT_NE_ARRAY2D(lhs, rhs) \ do { \ diff --git a/test/framework/border.cpp b/test/framework/border.cpp index 5a3c6688f..5761c09ec 100644 --- a/test/framework/border.cpp +++ b/test/framework/border.cpp @@ -318,4 +318,8 @@ template void prepare_borders(kleidicv_border_type_t, const Bordered *, TwoDimensional *); +template void prepare_borders(kleidicv_border_type_t, + kleidicv_border_values_t, const Bordered *, + TwoDimensional *); + } // namespace test diff --git a/test/framework/generator.h b/test/framework/generator.h index 3870737e3..54a812fe3 100644 --- a/test/framework/generator.h +++ b/test/framework/generator.h @@ -61,6 +61,24 @@ class PseudoRandomNumberGeneratorIntRange std::uniform_int_distribution dist_; }; // end of class PseudoRandomNumberGeneratorIntRange +// Generates pseudo-random floats within the range [min, max]. +template , bool> = true> +class PseudoRandomNumberGeneratorFloatRange + : public PseudoRandomNumberGenerator { + public: + PseudoRandomNumberGeneratorFloatRange(ElementType min, ElementType max) + : PseudoRandomNumberGenerator(), dist_(min, max) {} + + // Yields the next value or std::nullopt. + std::optional next() override { + return static_cast(dist_(this->rng_)); + } + + protected: + std::uniform_real_distribution dist_; +}; // end of class PseudoRandomNumberGeneratorFloatRange + // Generator which yields values of an iterable container. template class SequenceGenerator : public Generator { diff --git a/test/framework/kernel.h b/test/framework/kernel.h index 94ce83439..3c3314950 100644 --- a/test/framework/kernel.h +++ b/test/framework/kernel.h @@ -151,8 +151,13 @@ class KernelTest { prepare_source(element_generator); prepare_expected(kernel, array_layout, border_type, border_values); prepare_actual(); - check_results( - this->call_api(&input_, &actual_, border_type, border_values)); + if constexpr (std::is_same_v) { + check_approx_results( + this->call_api(&input_, &actual_, border_type, border_values)); + } else { + check_results( + this->call_api(&input_, &actual_, border_type, border_values)); + } } protected: @@ -187,9 +192,14 @@ class KernelTest { IntermediateType coefficient = kernel.at(height, width)[0]; InputType value = source.at(row + height, column + width * source.channels())[0]; - result = saturating_add( - result, - saturating_mul(coefficient, static_cast(value))); + IntermediateType product{0}; + if constexpr (std::is_integral_v) { + product = + saturating_mul(coefficient, static_cast(value)); + result = saturating_add(result, product); + } else { + result += (coefficient * value); + } } } @@ -277,6 +287,18 @@ class KernelTest { EXPECT_EQ_ARRAY2D(expected_, actual_); } + void check_approx_results(kleidicv_error_t err) { + if (debug_) { + std::cout << "[actual]" << std::endl; + dump(&actual_); + } + + EXPECT_EQ(KLEIDICV_OK, err); + + // Check that the actual result matches the expectation with less precision. + EXPECT_APPROX_EQ_ARRAY2D(1e-4, expected_, actual_); + } + // Input operand for the operation. Array2D input_; // Input operand with borders, used to calculate expected values. diff --git a/test/framework/utils.cpp b/test/framework/utils.cpp index 77d00e149..eaedb2f6f 100644 --- a/test/framework/utils.cpp +++ b/test/framework/utils.cpp @@ -20,6 +20,13 @@ bool test::Options::are_long_running_tests_skipped_ = true; namespace test { +float floatval(uint32_t v) { + float result; // Avoid cppcoreguidelines-init-variables. NOLINT + static_assert(sizeof(result) == sizeof(v)); + memcpy(&result, &v, sizeof(result)); + return result; +} + template void dump(const TwoDimensional *elements) { if (!elements) { diff --git a/test/framework/utils.h b/test/framework/utils.h index 622dc527a..d645a3937 100644 --- a/test/framework/utils.h +++ b/test/framework/utils.h @@ -109,6 +109,8 @@ class Options { static bool are_long_running_tests_skipped_; }; // end of class Options +float floatval(uint32_t v); + // Prints all the elements in a two-dimensional space. template void dump(const TwoDimensional *elements); @@ -206,6 +208,11 @@ T saturating_mul(T a, T b) { return result; } +template +auto abs_diff(T a, T b) { + return a > b ? a - b : b - a; +} + } // namespace test #endif // KLEIDICV_TEST_FRAMEWORK_UTILS_H_ -- GitLab