diff --git a/CHANGELOG.md b/CHANGELOG.md index cb86f428d22704687b8c7d2cf6d287ff744f9685..1f24b367ab074f5c0fb3ec4efebb223e369638b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ This changelog aims to follow the guiding principles of - 2-channel s16 and s16+u16 fixed-point coordinates and 1-channel u8 input. - 2-channel s16 + 5+5 bits' fractions fixed-point coordinates and 1-channel u8 input. - Implementation for cv::pyrDown in the OpenCV HAL. +- Sum function for 1 channel float. ## 0.2.0 - 2024-09-30 diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 01b33c8b00c5b6918bb1d340d6539c73fd72edaf..cb3f491b54603a731f8d8cce491af3d19b037568 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -962,6 +962,27 @@ int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, static_cast(element_size))); } +int sum(const uchar *src_data, size_t src_step, int src_type, size_t width, + size_t height, double *result) { + size_t channels = (src_type >> CV_CN_SHIFT) + 1; + + if (channels != 1) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + switch (CV_MAT_DEPTH(src_type)) { + case CV_32F: + float result_float = 0; + kleidicv_error_t err = + kleidicv_sum_f32(reinterpret_cast(src_data), src_step, + width, height, &result_float); + *result = result_float; + return convert_error(err); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + template kleidicv_error_t call_min_max(SingleThreadFunc min_max_func_st, MultithreadFunc min_max_func_mt, diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 70f92d20f98de4eda01ee8ef5dcab251a429d9b3..e628acd42f824457d77460bce7234b024b64c00b 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -123,6 +123,9 @@ int pyrdown(const uchar *src_data, size_t src_step, int src_width, int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height, int element_size); +int sum(const uchar *src_data, size_t src_step, int src_type, size_t width, + size_t height, double *result); + int min_max_idx(const uchar *src_data, size_t src_stride, int width, int height, int depth, double *min_value, double *max_value, int *min_index, int *max_index, uchar *mask); @@ -430,6 +433,19 @@ static inline int kleidicv_transpose_with_fallback( #undef cv_hal_transpose2d #define cv_hal_transpose2d kleidicv_transpose_with_fallback +// sum +#ifdef cv_hal_sum +static inline int kleidicv_sum_with_fallback(const uchar *src_data, + size_t src_step, int src_type, + size_t width, size_t height, + double *result) { + return KLEIDICV_HAL_FALLBACK_FORWARD(sum, cv_hal_sum, src_data, src_step, + src_type, width, height, result); +} +#undef cv_hal_sum +#define cv_hal_sum kleidicv_sum_with_fallback +#endif // cv_hal_sum + // min_max_idx static inline int kleidicv_min_max_idx_with_fallback( const uchar *src_data, size_t src_stride, int width, int height, int depth, diff --git a/adapters/opencv/opencv-4.10.patch b/adapters/opencv/opencv-4.10.patch index 23d1dd320c4c50f1ef2454d7c16426c2be67251c..211a472ab18c8fbc96178a1d7ff571dde66a5ce7 100644 --- a/adapters/opencv/opencv-4.10.patch +++ b/adapters/opencv/opencv-4.10.patch @@ -19,7 +19,7 @@ index 2b4035285f..729cd1dd43 100644 @@ -281,6 +281,11 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const dst.create(dims, size, dtype); Mat dstMat = dst.getMat(); - + + if( dims <= 2 ) { + int width_in_elements = src.cols * cn; + CALL_HAL(convertTo, cv_hal_convertTo, src.data, src.step, src.depth(), dstMat.data, dstMat.step, dstMat.depth(), width_in_elements, src.rows, alpha, beta); @@ -29,13 +29,25 @@ index 2b4035285f..729cd1dd43 100644 double scale[] = {alpha, beta}; CV_Assert( func != 0 ); diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp -index f78608dbad..299b5e54bd 100644 +index f78608dbad..a9384588ec 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp -@@ -953,6 +953,41 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst +@@ -953,6 +953,53 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst #define cv_hal_transpose2d hal_ni_transpose2d //! @endcond - + ++/** ++ @brief sum ++ @param src_data,src_step,src_type Source image ++ @param width,height Source image dimensions ++ @param result Pointer to save the sum result to. ++*/ ++inline int hal_ni_sum(const uchar *src_data, size_t src_step, int src_type, size_t width, size_t height, double *result) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } ++ ++//! @cond IGNORED ++#define cv_hal_sum hal_ni_sum ++//! @endcond ++ +/** + @brief convertTo + @param src_data,src_step,src_depth Source image @@ -72,8 +84,8 @@ index f78608dbad..299b5e54bd 100644 +//! @endcond + //! @} - - + + diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index 8c6d8ad9a9..47eb6fdb66 100644 --- a/modules/core/src/minmax.cpp @@ -90,14 +102,29 @@ index 8c6d8ad9a9..47eb6fdb66 100644 return; } else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) +diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp +index fade948336..17b40ca0e8 100644 +--- a/modules/core/src/sum.dispatch.cpp ++++ b/modules/core/src/sum.dispatch.cpp +@@ -199,6 +199,10 @@ Scalar sum(InputArray _src) + CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res); + + int k, cn = src.channels(), depth = src.depth(); ++ ++ double result = 0; ++ CALL_HAL_RET(sum, cv_hal_sum, result, src.data, src.step, src.type(), src.cols, src.rows); ++ + SumFunc func = getSumFunc(depth); + CV_Assert( cn <= 4 && func != 0 ); + diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp -index 773fed9b48..145d653f5d 100644 +index 773fed9b48..b74ff70f99 100644 --- a/modules/imgproc/src/hal_replacement.hpp +++ b/modules/imgproc/src/hal_replacement.hpp @@ -328,6 +328,60 @@ inline int hal_ni_remap32f(int src_type, const uchar *src_data, size_t src_step, #define cv_hal_remap32f hal_ni_remap32f //! @endcond - + +/** + @brief hal_remap with a short integer map + @param src_type source and destination image type @@ -172,7 +199,7 @@ index d7c9c64c3c..348208b72d 100644 + CALL_HAL(remap16s16u, cv_hal_remap16s16u, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, + map1.ptr(), map1.step, map2.ptr(), map2.step, borderType, borderValue.val); } - + interpolation &= ~WARP_RELATIVE_MAP; diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index d0f50a73bb..1c308887dc 100644 @@ -181,7 +208,7 @@ index d0f50a73bb..1c308887dc 100644 @@ -654,6 +654,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kx, ky, borderType) ); - + + { + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); diff --git a/conformity/opencv/test_sum.cpp b/conformity/opencv/test_sum.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a90691ee5f904f871a69198d2b4f771d127deb2d --- /dev/null +++ b/conformity/opencv/test_sum.cpp @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "tests.h" + +template +cv::Mat exec_sum(cv::Mat& input) { + // If sum is implemented for multi channels, these dimensions must be modified + cv::Mat result(1, 1, Format, cv::sum(input)); + return result; +} + +#if MANAGER +template +bool test_sum(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t height = 2; height <= 128; height *= 2) { + for (size_t width = 2; width <= 128; width *= 2) { + cv::Mat input(height, width, Format); + rng.fill(input, cv::RNG::UNIFORM, -10, 100); + cv::Mat actual = exec_sum(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_float_matrices_different(0.001, actual, expected)) { + fail_print_matrices(height, width, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +std::vector& sum_tests_get() { + // clang-format off + static std::vector tests = { + TEST("sum_f32", (test_sum), (exec_sum)), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 14ccc52c691d59f115cb22a9d822c26f9d9dbf85..f405106a448dd91a7d86527076a3cdaad0926ba0 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -37,6 +37,7 @@ std::vector all_tests = merge_tests({ float_conversion_tests_get, resize_tests_get, scale_tests_get, + sum_tests_get, min_max_tests_get, in_range_tests_get, remap_tests_get, diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index c1b3c8a4dc3b9e208b421df485a7341d2a28f34a..6b01dcc26472fabd0e0dfa00e5c2e2aa11f81d3e 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -16,6 +16,7 @@ std::vector& gaussian_blur_tests_get(); std::vector& rgb2yuv_tests_get(); std::vector& yuv2rgb_tests_get(); std::vector& sobel_tests_get(); +std::vector& sum_tests_get(); std::vector& exp_tests_get(); std::vector& float_conversion_tests_get(); std::vector& resize_tests_get(); diff --git a/doc/opencv.md b/doc/opencv.md index d56d7dedd6c9b60689d5c9e34119b5c5d5477756..52ca6429cc6c296f10d7f843a2acff0695b2314e 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -32,6 +32,13 @@ Notes on parameters: ### [`cv::bitwise_and()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga60b4d04b251ba5eb1392c34425497e14) Bitwise conjunction of two arrays. +### [`cv::sum()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga716e10a2dd9e228e4d3c95818f106722) +Calculates the sum of array elements. + +Notes on parameters: +* `src.depth()` - only supports `CV_32F` depth. +* `src.channels()` - only supports 1 channel. + ### [`cv::cvtColor()`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#ga397ae87e1288a81d2363b61574eb8cab) Converts the color space of an image. diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index d40d2dd9c008d784ede509085990aa5d495a28ef..beb49332cb2aacd1e4a94976efe312192d0f262e 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -66,6 +66,8 @@ MinMax_F32: opencv_perf_core '*minMaxVals/*' '($PIXEL_FORMAT, 32FC1)' MinMaxLoc_U8: opencv_perf_core '*minMaxLoc/*' '($PIXEL_FORMAT, 8UC1)' +Sum_F32: opencv_perf_core '*sum/*' '($PIXEL_FORMAT, 32FC1)' + FloatToInt: opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 32FC1, 8SC1, 1, 1, 0)' FloatToUint: opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 32FC1, 8UC1, 1, 1, 0)' IntToFloat: opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 8SC1, 32FC1, 1, 1, 0)' diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index f46892aedc132ec3d6561493d2e14d5aa4e9c871..931c5c0fa006cba53446b8a712a094996b4941e3 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -82,6 +82,7 @@ CORE_TEST_PATTERNS=( '*MinMaxLoc*' '*Core_ConvertScale*' '*Core_Exp*' + '*Core_Sum*' '*Core_MinMaxIdx*' '*Core_minMaxIdx*' '*Core_Array*'