diff --git a/CHANGELOG.md b/CHANGELOG.md index cb245debf99fccd295500b7da496a9088ea8a25f..8a694fd60bd67949d793a395eb36f7688aa28f5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ This changelog aims to follow the guiding principles of - Filter context creation API specification. - Gaussian Blur API specification. - In the OpenCV HAL, cvtColor YUV2RGB_NV21 is multithreaded. +- In the OpenCV HAL, minMaxIdx is multithreaded when index is not requested. ### Removed diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 8e8e59859cbd3ca0277d1d26b28369541aa20a8b..221b791ffda5c47fec5a490c337eeeef514c73ed 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -830,14 +830,15 @@ int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, template kleidicv_error_t call_min_max(FunctionType min_max_func, const uchar *src_data, size_t src_stride, int width, int height, - double *min_value, double *max_value) { + double *min_value, double *max_value, + kleidicv_thread_multithreading mt) { T tmp_min_value, tmp_max_value; T *p_min_value = min_value ? &tmp_min_value : nullptr; T *p_max_value = max_value ? &tmp_max_value : nullptr; kleidicv_error_t err = min_max_func(reinterpret_cast(src_data), src_stride, static_cast(width), static_cast(height), - p_min_value, p_max_value); + p_min_value, p_max_value, mt); if (min_value) { *min_value = static_cast(tmp_min_value); } @@ -896,29 +897,29 @@ int min_max_idx(const uchar *src_data, size_t src_step, int width, int height, switch (depth) { case CV_8S: - return convert_error(call_min_max(kleidicv_min_max_s8, src_data, - src_step, width, height, minVal, - maxVal)); + return convert_error(call_min_max( + kleidicv_thread_min_max_s8, src_data, src_step, width, height, minVal, + maxVal, get_multithreading())); case CV_8U: - return convert_error(call_min_max(kleidicv_min_max_u8, src_data, - src_step, width, height, - minVal, maxVal)); + return convert_error(call_min_max( + kleidicv_thread_min_max_u8, src_data, src_step, width, height, minVal, + maxVal, get_multithreading())); case CV_16S: - return convert_error(call_min_max(kleidicv_min_max_s16, src_data, - src_step, width, height, - minVal, maxVal)); + return convert_error(call_min_max( + kleidicv_thread_min_max_s16, src_data, src_step, width, height, + minVal, maxVal, get_multithreading())); case CV_16U: - return convert_error(call_min_max(kleidicv_min_max_u16, - src_data, src_step, width, - height, minVal, maxVal)); + return convert_error(call_min_max( + kleidicv_thread_min_max_u16, src_data, src_step, width, height, + minVal, maxVal, get_multithreading())); case CV_32S: - return convert_error(call_min_max(kleidicv_min_max_s32, src_data, - src_step, width, height, - minVal, maxVal)); + return convert_error(call_min_max( + kleidicv_thread_min_max_s32, src_data, src_step, width, height, + minVal, maxVal, get_multithreading())); case CV_32F: - return convert_error(call_min_max(kleidicv_min_max_f32, src_data, - src_step, width, height, minVal, - maxVal)); + return convert_error(call_min_max( + kleidicv_thread_min_max_f32, src_data, src_step, width, height, + minVal, maxVal, get_multithreading())); default: return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index b57fe5092a556df3c7e090052a2614b73bad1d1d..3071e4568891d532c12bad0c059856e4122bb3d9 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -55,6 +55,49 @@ kleidicv_error_t kleidicv_thread_yuv_sp_to_rgb_u8( size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, bool is_nv21, kleidicv_thread_multithreading); +/// Multithreaded implementation of kleidicv_min_max_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_min_max_u8(const uint8_t *src, + size_t src_stride, size_t width, + size_t height, uint8_t *min_value, + uint8_t *max_value, + kleidicv_thread_multithreading); +/// Multithreaded implementation of kleidicv_min_max_s8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_min_max_s8(const int8_t *src, + size_t src_stride, size_t width, + size_t height, int8_t *min_value, + int8_t *max_value, + kleidicv_thread_multithreading); +/// Multithreaded implementation of kleidicv_thread_min_max_u16 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_min_max_u16(const uint16_t *src, + size_t src_stride, size_t width, + size_t height, uint16_t *min_value, + uint16_t *max_value, + kleidicv_thread_multithreading); +/// Multithreaded implementation of kleidicv_thread_min_max_s16 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_min_max_s16(const int16_t *src, + size_t src_stride, size_t width, + size_t height, int16_t *min_value, + int16_t *max_value, + kleidicv_thread_multithreading); +/// Multithreaded implementation of kleidicv_thread_min_max_s32 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_min_max_s32(const int32_t *src, + size_t src_stride, size_t width, + size_t height, int32_t *min_value, + int32_t *max_value, + kleidicv_thread_multithreading); +/// Multithreaded implementation of kleidicv_thread_min_max_f32 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_min_max_f32(const float *src, + size_t src_stride, size_t width, + size_t height, float *min_value, + float *max_value, + kleidicv_thread_multithreading); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index bb1d664973646a14debfeb832b8353a7c5cfced4..2eb7aab13ecfef2dec417989098d3ac0b754955b 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -5,6 +5,8 @@ #include "kleidicv_thread/kleidicv_thread.h" #include +#include +#include #include "kleidicv/kleidicv.h" @@ -46,3 +48,87 @@ kleidicv_error_t kleidicv_thread_yuv_sp_to_rgb_u8( return mt.parallel(kleidicv_thread_yuv_sp_to_rgb_u8_callback, &callback_data, mt.parallel_data, (height + 1) / 2); } + +template +struct parallel_min_max_data { + FunctionType min_max_func; + const ScalarType *src; + size_t src_stride; + size_t width; + ScalarType *p_min_value; + ScalarType *p_max_value; +}; + +template +static kleidicv_error_t kleidicv_thread_min_max_callback(unsigned task_begin, + unsigned task_end, + void *void_data) { + auto *data = + reinterpret_cast *>( + void_data); + + return data->min_max_func( + data->src + task_begin * (data->src_stride / sizeof(ScalarType)), + data->src_stride, data->width, task_end - task_begin, + data->p_min_value ? data->p_min_value + task_begin : nullptr, + data->p_max_value ? data->p_max_value + task_begin : nullptr); +} + +template +kleidicv_error_t parallel_min_max(FunctionType min_max_func, + const ScalarType *src, size_t src_stride, + size_t width, size_t height, + ScalarType *p_min_value, + ScalarType *p_max_value, + kleidicv_thread_multithreading mt) { + std::vector min_values(height, + std::numeric_limits::max()); + std::vector max_values(height, + std::numeric_limits::min()); + + parallel_min_max_data callback_data = { + min_max_func, + src, + src_stride, + width, + p_min_value ? min_values.data() : nullptr, + p_max_value ? max_values.data() : nullptr}; + + auto return_val = + mt.parallel(kleidicv_thread_min_max_callback, + &callback_data, mt.parallel_data, height); + + if (p_min_value) { + *p_min_value = std::numeric_limits::max(); + for (ScalarType m : min_values) { + if (m < *p_min_value) { + *p_min_value = m; + } + } + } + if (p_max_value) { + *p_max_value = std::numeric_limits::min(); + for (ScalarType m : max_values) { + if (m > *p_max_value) { + *p_max_value = m; + } + } + } + return return_val; +} + +#define DEFINE_KLEIDICV_THREAD_MIN_MAX(suffix, type) \ + kleidicv_error_t kleidicv_thread_min_max_##suffix( \ + const type *src, size_t src_stride, size_t width, size_t height, \ + type *p_min_value, type *p_max_value, \ + kleidicv_thread_multithreading mt) { \ + return parallel_min_max(kleidicv_min_max_##suffix, src, src_stride, width, \ + height, p_min_value, p_max_value, mt); \ + } + +DEFINE_KLEIDICV_THREAD_MIN_MAX(u8, uint8_t); +DEFINE_KLEIDICV_THREAD_MIN_MAX(s8, int8_t); +DEFINE_KLEIDICV_THREAD_MIN_MAX(u16, uint16_t); +DEFINE_KLEIDICV_THREAD_MIN_MAX(s16, int16_t); +DEFINE_KLEIDICV_THREAD_MIN_MAX(s32, int32_t); +DEFINE_KLEIDICV_THREAD_MIN_MAX(f32, float); diff --git a/test/api/test_thread_min_max.cpp b/test/api/test_thread_min_max.cpp new file mode 100644 index 0000000000000000000000000000000000000000..088f716bfba9ac82d1f4849802ad8ce7db5c6ce3 --- /dev/null +++ b/test/api/test_thread_min_max.cpp @@ -0,0 +1,109 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv_thread/kleidicv_thread.h" +#include "multithreading_fake.h" + +#define KLEIDICV_MIN_MAX(type, suffix) \ + KLEIDICV_API(min_max, kleidicv_min_max_##suffix, type) + +KLEIDICV_MIN_MAX(int8_t, s8); +KLEIDICV_MIN_MAX(uint8_t, u8); +KLEIDICV_MIN_MAX(int16_t, s16); +KLEIDICV_MIN_MAX(uint16_t, u16); +KLEIDICV_MIN_MAX(int32_t, s32); +KLEIDICV_MIN_MAX(float, f32); + +#define KLEIDICV_THREAD_MIN_MAX(type, suffix) \ + KLEIDICV_API(thread_min_max, kleidicv_thread_min_max_##suffix, type) + +KLEIDICV_THREAD_MIN_MAX(int8_t, s8); +KLEIDICV_THREAD_MIN_MAX(uint8_t, u8); +KLEIDICV_THREAD_MIN_MAX(int16_t, s16); +KLEIDICV_THREAD_MIN_MAX(uint16_t, u16); +KLEIDICV_THREAD_MIN_MAX(int32_t, s32); +KLEIDICV_THREAD_MIN_MAX(float, f32); + +template +class Thread : public testing::Test {}; + +TYPED_TEST_SUITE_P(Thread); + +// Tuple of width, height, thread count. +typedef std::tuple P; + +static const auto test_params = { + P{1, 1, 1}, P{1, 2, 1}, P{1, 2, 2}, P{2, 1, 2}, P{2, 2, 1}, P{1, 3, 2}, + P{2, 3, 1}, P{6, 4, 1}, P{4, 5, 2}, P{2, 6, 3}, P{1, 7, 4}, P{12, 34, 5}}; + +TYPED_TEST_P(Thread, CompareWithSingle) { + size_t width = 0, height = 0, thread_count = 0; + for (auto params : test_params) { + std::tie(width, height, thread_count) = params; + test::Array2D src(width, height); + TypeParam min_single, max_single, min_multi, max_multi; + + test::PseudoRandomNumberGenerator generator; + src.fill(generator); + + kleidicv_error_t single_result = min_max()( + src.data(), src.stride(), width, height, &min_single, &max_single); + + kleidicv_error_t multi_result = thread_min_max()( + src.data(), src.stride(), width, height, &min_multi, &max_multi, + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, single_result); + EXPECT_EQ(KLEIDICV_OK, multi_result); + EXPECT_EQ(min_multi, min_single); + EXPECT_EQ(max_multi, max_single); + } +} + +TYPED_TEST_P(Thread, NullArguments) { + size_t width = 1, height = 2, thread_count = 2; + TypeParam src[2] = {1, 2}, min_value, max_value; + + min_value = max_value = 0; + kleidicv_error_t res = thread_min_max()( + src, width * sizeof(TypeParam), width, height, nullptr, &max_value, + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, res); + EXPECT_EQ(0, min_value); + EXPECT_EQ(2, max_value); + + min_value = max_value = 0; + res = thread_min_max()(src, width * sizeof(TypeParam), width, + height, &min_value, nullptr, + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, res); + EXPECT_EQ(1, min_value); + EXPECT_EQ(0, max_value); + + min_value = max_value = 0; + res = thread_min_max()(src, width * sizeof(TypeParam), width, + height, nullptr, nullptr, + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, res); + EXPECT_EQ(0, min_value); + EXPECT_EQ(0, max_value); +} + +REGISTER_TYPED_TEST_SUITE_P(Thread, CompareWithSingle, NullArguments); + +using MinMaxElementTypes = + ::testing::Types; + +INSTANTIATE_TYPED_TEST_SUITE_P(MinMax, Thread, MinMaxElementTypes); diff --git a/test/framework/utils.h b/test/framework/utils.h index a624db73cff510736430b65365aee87bb29fb8c7..c68024d8ebed6e2265b15f50c59565345eee6b86 100644 --- a/test/framework/utils.h +++ b/test/framework/utils.h @@ -21,7 +21,7 @@ template , bool> = true> \ static decltype(auto) name() { \ - return impl; \ + return (impl); \ } #define KLEIDICV_API_DIFFERENT_IO_TYPES(name, impl, itype, otype) \