diff --git a/CHANGELOG.md b/CHANGELOG.md index 6db386e0ad6a9fdb38044235cbddadebdc19c647..c453a0b10efffada3a2faa07739217ead76d04f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,9 @@ This changelog aims to follow the guiding principles of ### Added - Implementation of Rotate 90 degrees clockwise. +### Changed +- Increased precision of sum for 32 bit floats and expose it to OpenCV HAL. + ### Fixed - Handling of cv::erode and cv::dilate non-default constant borders. diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 17e83e68615d3df1b2bb3f6c97c8c9bab4fab4f3..3912cee534c86694ea14d4f0c999ca7e74ea95f7 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -1004,6 +1004,27 @@ int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, static_cast(element_size))); } +int sum(const uchar *src_data, size_t src_step, int src_type, size_t width, + size_t height, double *result) { + size_t channels = (src_type >> CV_CN_SHIFT) + 1; + + if (channels != 1) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + switch (CV_MAT_DEPTH(src_type)) { + case CV_32F: + float result_float = 0; + kleidicv_error_t err = + kleidicv_sum_f32(reinterpret_cast(src_data), src_step, + width, height, &result_float); + *result = result_float; + return convert_error(err); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + int rotate(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int angle) { int element_size = CV_ELEM_SIZE(src_type); diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index e184cecdcb369dd28abb5796c4ab89e881d40b86..35851694c46e1b06d53a8e20becc7abfd3279cc6 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -122,6 +122,9 @@ int pyrdown(const uchar *src_data, size_t src_step, int src_width, int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height, int element_size); +int sum(const uchar *src_data, size_t src_step, int src_type, size_t width, + size_t height, double *result); + int rotate(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int angle); @@ -467,6 +470,21 @@ static inline int kleidicv_transpose_with_fallback( #undef cv_hal_transpose2d #define cv_hal_transpose2d kleidicv_transpose_with_fallback +#if KLEIDICV_ENABLE_ALL_OPENCV_HAL +// sum +#ifdef cv_hal_sum +static inline int kleidicv_sum_with_fallback(const uchar *src_data, + size_t src_step, int src_type, + size_t width, size_t height, + double *result) { + return KLEIDICV_HAL_FALLBACK_FORWARD(sum, cv_hal_sum, src_data, src_step, + src_type, width, height, result); +} +#undef cv_hal_sum +#define cv_hal_sum kleidicv_sum_with_fallback +#endif // cv_hal_sum +#endif // KLEIDICV_ENABLE_ALL_OPENCV_HAL + // rotate static inline int kleidicv_rotate_with_fallback(int src_type, const uchar *src_data, diff --git a/adapters/opencv/opencv-4.10.patch b/adapters/opencv/opencv-4.10.patch index 06fd4ef89ce78584fa6c03c1643b40c2f75ef52c..5283cbb8f61cf2fa1e03050595a196be5ee010e7 100644 --- a/adapters/opencv/opencv-4.10.patch +++ b/adapters/opencv/opencv-4.10.patch @@ -29,13 +29,25 @@ index 2b4035285f..729cd1dd43 100644 double scale[] = {alpha, beta}; CV_Assert( func != 0 ); diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp -index f78608dbad..299b5e54bd 100644 +index f78608dbad..a9384588ec 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp -@@ -953,6 +953,41 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst +@@ -953,6 +953,53 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst #define cv_hal_transpose2d hal_ni_transpose2d //! @endcond ++/** ++ @brief sum ++ @param src_data,src_step,src_type Source image ++ @param width,height Source image dimensions ++ @param result Pointer to save the sum result to. ++*/ ++inline int hal_ni_sum(const uchar *src_data, size_t src_step, int src_type, size_t width, size_t height, double *result) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } ++ ++//! @cond IGNORED ++#define cv_hal_sum hal_ni_sum ++//! @endcond ++ +/** + @brief convertTo + @param src_data,src_step,src_depth Source image @@ -90,6 +102,21 @@ index 8c6d8ad9a9..47eb6fdb66 100644 return; } else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) +diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp +index fade948336..17b40ca0e8 100644 +--- a/modules/core/src/sum.dispatch.cpp ++++ b/modules/core/src/sum.dispatch.cpp +@@ -199,6 +199,10 @@ Scalar sum(InputArray _src) + CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res); + + int k, cn = src.channels(), depth = src.depth(); ++ ++ double result = 0; ++ CALL_HAL_RET(sum, cv_hal_sum, result, src.data, src.step, src.type(), src.cols, src.rows); ++ + SumFunc func = getSumFunc(depth); + CV_Assert( cn <= 4 && func != 0 ); + diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp index 773fed9b48..b74ff70f99 100644 --- a/modules/imgproc/src/hal_replacement.hpp diff --git a/conformity/opencv/test_sum.cpp b/conformity/opencv/test_sum.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a90691ee5f904f871a69198d2b4f771d127deb2d --- /dev/null +++ b/conformity/opencv/test_sum.cpp @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "tests.h" + +template +cv::Mat exec_sum(cv::Mat& input) { + // If sum is implemented for multi channels, these dimensions must be modified + cv::Mat result(1, 1, Format, cv::sum(input)); + return result; +} + +#if MANAGER +template +bool test_sum(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t height = 2; height <= 128; height *= 2) { + for (size_t width = 2; width <= 128; width *= 2) { + cv::Mat input(height, width, Format); + rng.fill(input, cv::RNG::UNIFORM, -10, 100); + cv::Mat actual = exec_sum(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_float_matrices_different(0.001, actual, expected)) { + fail_print_matrices(height, width, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +std::vector& sum_tests_get() { + // clang-format off + static std::vector tests = { + TEST("sum_f32", (test_sum), (exec_sum)), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index afb0cbeb21777fb3ae903fe77b3e443345b00895..614b66cb1467d3edcc3e838d6dbba25b983c6f88 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -37,6 +37,7 @@ std::vector all_tests = merge_tests({ float_conversion_tests_get, resize_tests_get, scale_tests_get, + sum_tests_get, min_max_tests_get, in_range_tests_get, remap_tests_get, diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index c5b8c4f75829a21b5fd92bdeb6e5289d0e6d063e..6087026f2c333bb24165e4a844cd5e26d0f35a3c 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -17,6 +17,7 @@ std::vector& gaussian_blur_tests_get(); std::vector& rgb2yuv_tests_get(); std::vector& yuv2rgb_tests_get(); std::vector& sobel_tests_get(); +std::vector& sum_tests_get(); std::vector& exp_tests_get(); std::vector& float_conversion_tests_get(); std::vector& resize_tests_get(); diff --git a/doc/opencv.md b/doc/opencv.md index 2650f8015a08975ee82dac4e5bee4c8287bbe511..59246297ab19f680f648077039322e8cf8e24f04 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -32,6 +32,13 @@ Notes on parameters: ### [`cv::bitwise_and()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga60b4d04b251ba5eb1392c34425497e14) Bitwise conjunction of two arrays. +### [`cv::sum()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga716e10a2dd9e228e4d3c95818f106722) +Calculates the sum of array elements. + +Notes on parameters: +* `src.depth()` - only supports `CV_32F` depth. +* `src.channels()` - only supports 1 channel. + ### [`cv::cvtColor()`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#ga397ae87e1288a81d2363b61574eb8cab) Converts the color space of an image. diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index cee298b942a24181a527ef8a09863a8c3d5778ee..420a3733a2c265638c13ac672011787111d2ee94 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -160,6 +160,9 @@ static inline int32x2_t vget_high(int32x4_t vec) { return vget_high_s32(vec); static inline uint32x2_t vget_high(uint32x4_t vec) { return vget_high_u32(vec); } static inline int64x1_t vget_high(int64x2_t vec) { return vget_high_s64(vec); } static inline uint64x1_t vget_high(uint64x2_t vec) { return vget_high_u64(vec); } +static inline float16x4_t vget_high(float16x8_t vec) { return vget_high_f16(vec); } +static inline float32x2_t vget_high(float32x4_t vec) { return vget_high_f32(vec); } +static inline float64x1_t vget_high(float64x2_t vec) { return vget_high_f64(vec); } // ----------------------------------------------------------------------------- // vcgeq* @@ -179,6 +182,9 @@ static inline int32x2_t vget_low(int32x4_t vec) { return vget_low_s32(vec); } static inline uint32x2_t vget_low(uint32x4_t vec) { return vget_low_u32(vec); } static inline int64x1_t vget_low(int64x2_t vec) { return vget_low_s64(vec); } static inline uint64x1_t vget_low(uint64x2_t vec) { return vget_low_u64(vec); } +static inline float16x4_t vget_low(float16x8_t vec) { return vget_low_f16(vec); } +static inline float32x2_t vget_low(float32x4_t vec) { return vget_low_f32(vec); } +static inline float64x1_t vget_low(float64x2_t vec) { return vget_low_f64(vec); } // ----------------------------------------------------------------------------- // vminq* @@ -335,7 +341,9 @@ static inline int32x4_t vld1q(const int32_t *src) { return vld1q_s32(src); } static inline uint32x4_t vld1q(const uint32_t *src) { return vld1q_u32(src); } static inline int64x2_t vld1q(const int64_t *src) { return vld1q_s64(src); } static inline uint64x2_t vld1q(const uint64_t *src) { return vld1q_u64(src); } +static inline float16x8_t vld1q(const float16_t *src) { return vld1q_f16(src); } static inline float32x4_t vld1q(const float32_t *src) { return vld1q_f32(src); } +static inline float64x2_t vld1q(const float64_t *src) { return vld1q_f64(src); } static inline int8x16x2_t vld2q(const int8_t *src) { return vld2q_s8(src); } static inline uint8x16x2_t vld2q(const uint8_t *src) { return vld2q_u8(src); } @@ -518,6 +526,12 @@ static inline uint32x4_t vrev64q(uint32x4_t src) { return vrev64q_u32(src); } static inline int64x2_t vrev64q(int64x2_t src) { return src; } static inline uint64x2_t vrev64q(uint64x2_t src) { return src; } +// ----------------------------------------------------------------------------- +// vcvt* +// ----------------------------------------------------------------------------- + +static inline float64x2_t vcvt_f64(float32x2_t vec) { return vcvt_f64_f32(vec); } + // clang-format on } // namespace kleidicv::neon diff --git a/kleidicv/src/arithmetics/sum_api.cpp b/kleidicv/src/arithmetics/sum_api.cpp index 2959587e80acf002a41c5427a4aa4f23f1e36683..a24cb802179ecafa2a86f38e4dcb1e1287281e22 100644 --- a/kleidicv/src/arithmetics/sum_api.cpp +++ b/kleidicv/src/arithmetics/sum_api.cpp @@ -9,7 +9,7 @@ namespace kleidicv { namespace neon { -template +template kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, size_t height, T *sum); @@ -17,7 +17,7 @@ kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, namespace sve2 { -template +template kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, size_t height, T *sum); @@ -25,7 +25,7 @@ kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, namespace sme2 { -template +template kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, size_t height, T *sum); @@ -33,6 +33,7 @@ kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, } // namespace kleidicv -KLEIDICV_MULTIVERSION_C_API(kleidicv_sum_f32, &kleidicv::neon::sum, - KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::sum), - KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::sum)); +KLEIDICV_MULTIVERSION_C_API( + kleidicv_sum_f32, (&kleidicv::neon::sum), + KLEIDICV_SVE2_IMPL_IF((&kleidicv::sve2::sum)), + (&kleidicv::sme2::sum)); diff --git a/kleidicv/src/arithmetics/sum_neon.cpp b/kleidicv/src/arithmetics/sum_neon.cpp index 9021d0bac0ae2bdbe9b502f594ab99b33cc2b187..b4bc8f49111307d0c6d7251421b98ba9f3f3430c 100644 --- a/kleidicv/src/arithmetics/sum_neon.cpp +++ b/kleidicv/src/arithmetics/sum_neon.cpp @@ -8,33 +8,51 @@ namespace kleidicv::neon { -template -class Sum final : public UnrollTwice { +template +class Sum; + +template <> +class Sum final : public UnrollTwice { public: + using ScalarType = float; + using ScalarTypeInternal = double; using VecTraits = neon::VecTraits; using VectorType = typename VecTraits::VectorType; - VectorType vector_sum; - ScalarType scalar_sum; + using VecTraitsInternal = + KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorTypeInternal = typename VecTraitsInternal::VectorType; + + VectorTypeInternal vector_sum; + ScalarTypeInternal scalar_sum; - Sum() : vector_sum(VectorType{0}), scalar_sum(0) {} + Sum() : vector_sum(VectorTypeInternal{0}), scalar_sum(0) {} - void vector_path(VectorType src) { vector_sum = vaddq(src, vector_sum); } + void vector_path(VectorType src) { + VectorTypeInternal src_low = vcvt_f64(vget_low(src)); + VectorTypeInternal src_high = vcvt_f64(vget_high(src)); + vector_sum = vaddq(vector_sum, vaddq(src_low, src_high)); + } - void scalar_path(ScalarType src) { scalar_sum += src; } + void scalar_path(ScalarType src) { + scalar_sum += static_cast(src); + } - ScalarType get_sum() { return vaddvq(vector_sum) + scalar_sum; } + ScalarType get_sum() const { + ScalarTypeInternal sum = vaddvq(vector_sum) + scalar_sum; + return static_cast(sum); + } }; -template -kleidicv_error_t sum(const ScalarType *src, size_t src_stride, size_t width, - size_t height, ScalarType *sum) { +template +kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, + size_t height, T *sum) { CHECK_POINTERS(sum); CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_IMAGE_SIZE(width, height); Rectangle rect{width, height}; - Rows src_rows{src, src_stride}; - Sum operation; + Rows src_rows{src, src_stride}; + Sum operation; apply_operation_by_rows(operation, rect, src_rows); *sum = operation.get_sum(); @@ -42,11 +60,11 @@ kleidicv_error_t sum(const ScalarType *src, size_t src_stride, size_t width, return KLEIDICV_OK; } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ - template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum( \ - const type *src, size_t src_stride, size_t width, size_t height, \ +#define KLEIDICV_INSTANTIATE_TEMPLATE(type, type_internal) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum( \ + const type *src, size_t src_stride, size_t width, size_t height, \ type *sum) -KLEIDICV_INSTANTIATE_TEMPLATE(float); +KLEIDICV_INSTANTIATE_TEMPLATE(float, double); } // namespace kleidicv::neon diff --git a/kleidicv/src/arithmetics/sum_sc.h b/kleidicv/src/arithmetics/sum_sc.h index 8109665cb2a7f1a69a19ec1e900f20b086b8d2c5..71c5d831ee9aa75f56fd03cf7d88e399a70e86cd 100644 --- a/kleidicv/src/arithmetics/sum_sc.h +++ b/kleidicv/src/arithmetics/sum_sc.h @@ -11,54 +11,68 @@ namespace KLEIDICV_TARGET_NAMESPACE { -template -class Sum final : public UnrollTwice { +template +class Sum; + +template <> +class Sum final : public UnrollTwice { public: + using ScalarType = float; + using ScalarTypeInternal = double; using ContextType = Context; using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; using VectorType = typename VecTraits::VectorType; + using VecTraitsInternal = + KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorTypeInternal = typename VecTraitsInternal::VectorType; - explicit Sum(VectorType &accumulator) KLEIDICV_STREAMING_COMPATIBLE + explicit Sum(VectorTypeInternal &accumulator) KLEIDICV_STREAMING_COMPATIBLE : accumulator_{accumulator} { - accumulator_ = VecTraits::svdup(0); + accumulator_ = VecTraitsInternal::svdup(0); } void vector_path(ContextType ctx, VectorType src) KLEIDICV_STREAMING_COMPATIBLE { - accumulator_ = svadd_m(ctx.predicate(), accumulator_, src); + VectorTypeInternal src_widened_evens = + svcvt_f64_f32_x(VecTraits::svptrue(), src); + VectorTypeInternal src_widened_odds = + svcvtlt_f64_f32_x(VecTraits::svptrue(), src); + accumulator_ = + svadd_m(ctx.predicate(), accumulator_, + svadd_m(ctx.predicate(), src_widened_evens, src_widened_odds)); } - ScalarType get_sum() KLEIDICV_STREAMING_COMPATIBLE { - ScalarType accumulator_final[VecTraits::max_num_lanes()] = {0}; - svst1(VecTraits::svptrue(), accumulator_final, accumulator_); + ScalarType get_sum() const KLEIDICV_STREAMING_COMPATIBLE { + ScalarTypeInternal accumulator_final[VecTraitsInternal::max_num_lanes()] = { + 0}; + svst1(VecTraitsInternal::svptrue(), accumulator_final, accumulator_); - ScalarType sum = 0; - for (size_t i = 0; i != VecTraits::num_lanes(); ++i) { + ScalarTypeInternal sum = 0; + for (size_t i = 0; i != VecTraitsInternal::num_lanes(); ++i) { sum += accumulator_final[i]; } - return sum; + return static_cast(sum); } private: - VectorType &accumulator_; + VectorTypeInternal &accumulator_; }; -template -kleidicv_error_t sum_sc(const ScalarType *src, size_t src_stride, size_t width, - size_t height, - ScalarType *sum) KLEIDICV_STREAMING_COMPATIBLE { - using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; - using VectorType = typename VecTraits::VectorType; +template +kleidicv_error_t sum_sc(const T *src, size_t src_stride, size_t width, + size_t height, T *sum) KLEIDICV_STREAMING_COMPATIBLE { + using VecTraitsInternal = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorTypeInternal = typename VecTraitsInternal::VectorType; CHECK_POINTERS(sum); CHECK_POINTER_AND_STRIDE(src, src_stride, height); CHECK_IMAGE_SIZE(width, height); Rectangle rect{width, height}; - Rows src_rows{src, src_stride}; + Rows src_rows{src, src_stride}; - VectorType accumulator; - Sum operation{accumulator}; + VectorTypeInternal accumulator; + Sum operation{accumulator}; apply_operation_by_rows(operation, rect, src_rows); diff --git a/kleidicv/src/arithmetics/sum_sme2.cpp b/kleidicv/src/arithmetics/sum_sme2.cpp index 51904798659043e1c44de55b478ff5c4f1f44030..9a6b5d217eace339da773649830542aa6920b1e8 100644 --- a/kleidicv/src/arithmetics/sum_sme2.cpp +++ b/kleidicv/src/arithmetics/sum_sme2.cpp @@ -6,17 +6,17 @@ namespace kleidicv::sme2 { -template +template KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, size_t height, T *sum) { - return sum_sc(src, src_stride, width, height, sum); + return sum_sc(src, src_stride, width, height, sum); } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ - template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum( \ - const type *src, size_t src_stride, size_t width, size_t height, \ +#define KLEIDICV_INSTANTIATE_TEMPLATE(type, type_internal) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum( \ + const type *src, size_t src_stride, size_t width, size_t height, \ type *sum) -KLEIDICV_INSTANTIATE_TEMPLATE(float); +KLEIDICV_INSTANTIATE_TEMPLATE(float, double); } // namespace kleidicv::sme2 diff --git a/kleidicv/src/arithmetics/sum_sve2.cpp b/kleidicv/src/arithmetics/sum_sve2.cpp index a1a0a5cf5626dcf1c94cd452a5120e0a9f34f20b..d6764506a0e53a2e14b285cb661512bb02c2e9d7 100644 --- a/kleidicv/src/arithmetics/sum_sve2.cpp +++ b/kleidicv/src/arithmetics/sum_sve2.cpp @@ -6,18 +6,18 @@ namespace kleidicv::sve2 { -template +template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, size_t height, T *sum) { - return sum_sc(src, src_stride, width, height, sum); + return sum_sc(src, src_stride, width, height, sum); } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ - template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum( \ - const type *src, size_t src_stride, size_t width, size_t height, \ +#define KLEIDICV_INSTANTIATE_TEMPLATE(type, type_internal) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum( \ + const type *src, size_t src_stride, size_t width, size_t height, \ type *sum) -KLEIDICV_INSTANTIATE_TEMPLATE(float); +KLEIDICV_INSTANTIATE_TEMPLATE(float, double); } // namespace kleidicv::sve2 diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index 48f3ffbabea4108d911063829ebd389554429885..48060a76266f8893d109f012e74f74706b6ead9e 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -66,6 +66,8 @@ MinMax_F32: opencv_perf_core '*minMaxVals/*' '($PIXEL_FORMAT, 32FC1)' MinMaxLoc_U8: opencv_perf_core '*minMaxLoc/*' '($PIXEL_FORMAT, 8UC1)' +Sum_F32: opencv_perf_core '*sum/*' '($PIXEL_FORMAT, 32FC1)' + FloatToInt: opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 32FC1, 8SC1, 1, 1, 0)' FloatToUint: opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 32FC1, 8UC1, 1, 1, 0)' IntToFloat: opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 8SC1, 32FC1, 1, 1, 0)' diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index e071edcf0a3f156d72e2d8666f773a9a8a11d21c..9db4b03037458475bc3ca7b6d186d48dc5f781cb 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -121,6 +121,7 @@ CORE_TEST_PATTERNS=( '*MinMaxLoc*' '*Core_ConvertScale*' '*Core_Exp*' + '*Core_Sum*' '*Core_MinMaxIdx*' '*Core_minMaxIdx*' '*Core_Array*' diff --git a/test/api/test_sum.cpp b/test/api/test_sum.cpp index e2002f4c6409b0289c3eb33766ca256c2550fd9d..ff62b618c408b11715aa2aedb0eeee651a22dd84 100644 --- a/test/api/test_sum.cpp +++ b/test/api/test_sum.cpp @@ -6,6 +6,8 @@ #include +#include "framework/array.h" +#include "framework/generator.h" #include "kleidicv/kleidicv.h" TEST(Sum, Ones) { @@ -56,6 +58,29 @@ TEST(Sum, Zeroes) { EXPECT_FLOAT_EQ(0, sum); } +TEST(Sum, Random) { + test::PseudoRandomNumberGeneratorFloatRange random_generator{-99999, + 99999}; + test::Array2D src(32, 32, 0, 1); + src.fill(random_generator); + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src.data(), src.stride(), src.width(), + src.height(), &sum)); + + double expected_sum = 0; + for (size_t row = 0; row < src.height(); ++row) { + for (size_t column = 0; column < src.width() / src.channels(); ++column) { + for (size_t ch = 0; ch < src.channels(); ++ch) { + expected_sum += *src.at(row, column * src.channels() + ch); + } + } + } + + EXPECT_FLOAT_EQ(expected_sum, sum); +} + TEST(Sum, Single) { // clang-format off float src[] = { diff --git a/test/framework/generator.h b/test/framework/generator.h index 4d87e97e805b0256f2bd4b2ea6a2f91aae890830..b2e3e0e7148cb6d87f6821b4606b6041aa04ffd6 100644 --- a/test/framework/generator.h +++ b/test/framework/generator.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "framework/abstract.h" #include "framework/utils.h" @@ -83,6 +84,23 @@ class PseudoRandomNumberGeneratorIntRange std::uniform_int_distribution dist_; }; // end of class PseudoRandomNumberGeneratorIntRange +template , bool> = true> +class PseudoRandomNumberGeneratorFloatRange + : public PseudoRandomNumberGenerator { + public: + PseudoRandomNumberGeneratorFloatRange(ElementType min, ElementType max) + : PseudoRandomNumberGenerator(), dist_(min, max) {} + + // Yields the next value or std::nullopt. + std::optional next() override { + return static_cast(dist_(this->rng_)); + } + + protected: + std::uniform_real_distribution dist_; +}; // end of class PseudoRandomNumberGeneratorFloatRange + // Generator which yields values of an iterable container. template class SequenceGenerator : public Generator {