From 5c2429af61f9adc227c5ad7dc8e421f39a88c997 Mon Sep 17 00:00:00 2001 From: Yufeng Shi Date: Wed, 13 Nov 2024 17:06:50 +0000 Subject: [PATCH] Add Neon implementation for Rotate 90 --- CHANGELOG.md | 3 + adapters/opencv/kleidicv_hal.cpp | 31 +++ adapters/opencv/kleidicv_hal.h | 16 ++ doc/functionality.md | 11 +- doc/opencv.md | 7 + .../include/kleidicv/arithmetics/rotate.h | 48 ++++ kleidicv/include/kleidicv/kleidicv.h | 33 +++ kleidicv/include/kleidicv/neon_intrinsics.h | 13 ++ kleidicv/src/arithmetics/rotate_api.cpp | 10 + kleidicv/src/arithmetics/rotate_neon.cpp | 187 ++++++++++++++++ .../include/kleidicv_thread/kleidicv_thread.h | 10 + kleidicv_thread/src/kleidicv_thread.cpp | 23 +- scripts/ci-opencv.sh | 1 + test/api/test_rotate.cpp | 208 ++++++++++++++++++ test/api/test_rotate_thread.cpp | 147 +++++++++++++ 15 files changed, 741 insertions(+), 7 deletions(-) create mode 100644 kleidicv/include/kleidicv/arithmetics/rotate.h create mode 100644 kleidicv/src/arithmetics/rotate_api.cpp create mode 100644 kleidicv/src/arithmetics/rotate_neon.cpp create mode 100644 test/api/test_rotate.cpp create mode 100644 test/api/test_rotate_thread.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 232821e2e..94c128c23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ This changelog aims to follow the guiding principles of ## 0.4.0 - not yet released +### Added +- Implementation of Rotate 90 degrees clockwise. + ## 0.3.0 - 2024-12-12 ### Added diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index ce1cbddc6..72c9f7405 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -17,6 +17,7 @@ #include "kleidicv/kleidicv.h" #include "kleidicv_thread/kleidicv_thread.h" #include "opencv2/core/base.hpp" +#include "opencv2/core/cvdef.h" #include "opencv2/core/hal/interface.h" #include "opencv2/core/types.hpp" #include "opencv2/core/utility.hpp" @@ -40,6 +41,8 @@ enum { MULTITHREAD_MIN_ELEMENTS_RGBA_TO_BGRA_U8 = 11000, MULTITHREAD_MIN_ELEMENTS_SCALE_U8 = 5000, MULTITHREAD_MIN_ELEMENTS_SCALE_F32 = 20000, + MULTITHREAD_MIN_ELEMENTS_ROTATE_U8 = 40000, + MULTITHREAD_MIN_ELEMENTS_ROTATE_U16 = 30000, }; static int convert_error(kleidicv_error_t e) { @@ -996,6 +999,34 @@ int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, static_cast(element_size))); } +int rotate(int src_type, const uchar *src_data, size_t src_step, int src_width, + int src_height, uchar *dst_data, size_t dst_step, int angle) { + int element_size = CV_ELEM_SIZE(src_type); + + size_t multithread_min_elements = 0; + switch (element_size) { + case sizeof(uint8_t): + multithread_min_elements = MULTITHREAD_MIN_ELEMENTS_ROTATE_U8; + break; + case sizeof(uint16_t): + multithread_min_elements = MULTITHREAD_MIN_ELEMENTS_ROTATE_U16; + break; + } + + return convert_error( + src_width * src_height < multithread_min_elements + ? kleidicv_rotate(reinterpret_cast(src_data), src_step, + static_cast(src_width), + static_cast(src_height), + reinterpret_cast(dst_data), dst_step, angle, + static_cast(element_size)) + : kleidicv_thread_rotate( + reinterpret_cast(src_data), src_step, + static_cast(src_width), static_cast(src_height), + reinterpret_cast(dst_data), dst_step, angle, + static_cast(element_size), get_multithreading())); +} + template kleidicv_error_t call_min_max(SingleThreadFunc min_max_func_st, MultithreadFunc min_max_func_mt, diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index f2db78759..4e9b9c283 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -123,6 +123,9 @@ int pyrdown(const uchar *src_data, size_t src_step, int src_width, int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height, int element_size); +int rotate(int src_type, const uchar *src_data, size_t src_step, int src_width, + int src_height, uchar *dst_data, size_t dst_step, int angle); + int min_max_idx(const uchar *src_data, size_t src_stride, int width, int height, int depth, double *min_value, double *max_value, int *min_index, int *max_index, uchar *mask); @@ -465,6 +468,19 @@ static inline int kleidicv_transpose_with_fallback( #undef cv_hal_transpose2d #define cv_hal_transpose2d kleidicv_transpose_with_fallback +// rotate +static inline int kleidicv_rotate_with_fallback(int src_type, + const uchar *src_data, + size_t src_step, int src_width, + int src_height, uchar *dst_data, + size_t dst_step, int angle) { + return KLEIDICV_HAL_FALLBACK_FORWARD(rotate, cv_hal_rotate90, src_type, + src_data, src_step, src_width, + src_height, dst_data, dst_step, angle); +} +#undef cv_hal_rotate90 +#define cv_hal_rotate90 kleidicv_rotate_with_fallback + // min_max_idx static inline int kleidicv_min_max_idx_with_fallback( const uchar *src_data, size_t src_stride, int width, int height, int depth, diff --git a/doc/functionality.md b/doc/functionality.md index 44eb65a6d..dab8381f9 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -64,11 +64,12 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Count non-zeros | | x | | | | | | | ## Matrix transformation functions -| | 8-bit | 16-bit | 32-bit | 64-bit | -|-----------------|-------|--------|--------|--------| -| Merge | x | x | x | x | -| Split | x | x | x | x | -| Transpose | x | x | x | x | +| | 8-bit | 16-bit | 32-bit | 64-bit | +|-------------------------------|-------|--------|--------|--------| +| Merge | x | x | x | x | +| Split | x | x | x | x | +| Transpose | x | x | x | x | +| Rotate (90 degrees clockwise) | x | x | x | x | ## Image filters | | u8 | s16 | u16 | diff --git a/doc/opencv.md b/doc/opencv.md index 250605a02..2650f8015 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -152,6 +152,13 @@ Transposes a matrix. Notes on parameters: * In-place `transpose` is only supported for square matrices. (`src.cols == src.rows`) +### [`cv::rotate()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga4ad01c0978b0ce64baa246811deeac24) +Rotates a 2D array in multiples of 90 degrees. + +Notes on parameters: +* In-place `rotate` is not supported. (`src == dst`) +* `rotateCode` - only `ROTATE_90_CLOCKWISE` is supported. + ### [`cv::minMaxIdx()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga7622c466c628a75d9ed008b42250a73f) Finds the minimum and maximum element values and their positions. diff --git a/kleidicv/include/kleidicv/arithmetics/rotate.h b/kleidicv/include/kleidicv/arithmetics/rotate.h new file mode 100644 index 000000000..5b7fd35db --- /dev/null +++ b/kleidicv/include/kleidicv/arithmetics/rotate.h @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_ARITHMETICS_ROTATE_H +#define KLEIDICV_ARITHMETICS_ROTATE_H + +#include +#include + +#include "kleidicv/ctypes.h" + +namespace kleidicv { + +inline bool rotate_is_implemented(const void *src, void *dst, int angle, + size_t element_size) { + if (angle != 90) { + return false; + } + if (src == dst) { + // Do not support inplace rotate at the moment + return false; + } + switch (element_size) { + case sizeof(uint8_t): + case sizeof(uint16_t): + case sizeof(uint32_t): + case sizeof(uint64_t): + return true; + default: + return false; + } +} + +namespace neon { + +kleidicv_error_t rotate(const void *src, size_t src_stride, size_t width, + size_t height, void *dst, size_t dst_stride, int angle, + size_t element_size); +} // namespace neon + +namespace sve2 {} // namespace sve2 + +namespace sme2 {} // namespace sme2 + +} // namespace kleidicv + +#endif // KLEIDICV_ARITHMETICS_ROTATE_H diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 86f6f5d5c..7cb4e08b7 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1466,6 +1466,39 @@ KLEIDICV_API_DECLARATION(kleidicv_transpose, const void *src, size_t src_stride, void *dst, size_t dst_stride, size_t src_width, size_t src_height, size_t element_size); +/// Matrix rotate operation. +/// In-place operation is not supported. +/// Only supports 90 degrees clockwise rotate +/// Example for `src[3,2]` to `dst[2,3]`: +/// ``` +/// | 0 | 1 | 2 | | 4 | 0 | +/// | 4 | 5 | 6 | -> | 5 | 1 | +/// | 6 | 2 | +/// ``` +/// Number of elements is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// @param src Pointer to the source data. Must be non-null. +/// Must be aligned to `element_size`. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. +/// Must be a multiple of `element_size` and no less than +/// `width * element_size`, except for single-row images. +/// @param width Number of columns in the source data. +/// @param height Number of rows in the source data. +/// @param dst Pointer to the destination data. Must be non-null. +/// Can be the same as source data for in-place operation. +/// Must be aligned to `element_size`. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must be a multiple of `element_size` and no less than +/// `height * element_size`, except for single-column +/// images. +/// @param angle Degrees to rotate clockwise. Must be 90. +/// @param element_size Size of one element in bytes. Must be 1, 2, 4 or 8. +/// +KLEIDICV_API_DECLARATION(kleidicv_rotate, const void *src, size_t src_stride, + size_t width, size_t height, void *dst, + size_t dst_stride, int angle, size_t element_size); + /// Merges separate 1-channel source streams to one multi channel stream. Width /// and height are the same for all the source streams and for the destination. /// Number of pixels is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index b3edbf4ef..cee298b94 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -505,6 +505,19 @@ static inline uint32x4_t vcombine(uint32x2_t lhs, uint32x2_t rhs) { return vcomb static inline int64x2_t vcombine(int64x1_t lhs, int64x1_t rhs) { return vcombine_s64(lhs, rhs); } static inline uint64x2_t vcombine(uint64x1_t lhs, uint64x1_t rhs) { return vcombine_u64(lhs, rhs); } +// ----------------------------------------------------------------------------- +// vrev* +// ----------------------------------------------------------------------------- + +static inline int8x16_t vrev64q(int8x16_t src) { return vrev64q_s8(src); } +static inline uint8x16_t vrev64q(uint8x16_t src) { return vrev64q_u8(src); } +static inline int16x8_t vrev64q(int16x8_t src) { return vrev64q_s16(src); } +static inline uint16x8_t vrev64q(uint16x8_t src) { return vrev64q_u16(src); } +static inline int32x4_t vrev64q(int32x4_t src) { return vrev64q_s32(src); } +static inline uint32x4_t vrev64q(uint32x4_t src) { return vrev64q_u32(src); } +static inline int64x2_t vrev64q(int64x2_t src) { return src; } +static inline uint64x2_t vrev64q(uint64x2_t src) { return src; } + // clang-format on } // namespace kleidicv::neon diff --git a/kleidicv/src/arithmetics/rotate_api.cpp b/kleidicv/src/arithmetics/rotate_api.cpp new file mode 100644 index 000000000..3a2d8b8b3 --- /dev/null +++ b/kleidicv/src/arithmetics/rotate_api.cpp @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/arithmetics/rotate.h" +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" + +KLEIDICV_MULTIVERSION_C_API(kleidicv_rotate, &kleidicv::neon::rotate, nullptr, + nullptr); diff --git a/kleidicv/src/arithmetics/rotate_neon.cpp b/kleidicv/src/arithmetics/rotate_neon.cpp new file mode 100644 index 000000000..3955d14b1 --- /dev/null +++ b/kleidicv/src/arithmetics/rotate_neon.cpp @@ -0,0 +1,187 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "kleidicv/arithmetics/rotate.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +template +static void rotate_vectors_recursively(DstVectorType *dst_vectors, + Rows src_rows) { + // order is halved at every recursive call, once it is 2 the recursion should + // stop and the input data needs to be read. + if constexpr (Order == 2) { + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t index = 0; index < BufferSize; index += Order) { + using SrcVectorType = typename VecTraits::VectorType; + SrcVectorType src_vector[2]; + + src_vector[0] = vld1q(&src_rows.at(index + 0)[0]); + src_vector[1] = vld1q(&src_rows.at(index + 1)[0]); + + // If order is 2 then SrcVectorType is the same as DstVectorType + dst_vectors[index + 0] = vtrn1q(src_vector[0], src_vector[1]); + dst_vectors[index + 1] = vtrn2q(src_vector[0], src_vector[1]); + } + } else { + // First the input for the current rotate stage, which is the output of + // the previous stage, is created. The previous stage rotates + // elements half the size of the current stage and its order is also half of + // the current one. + half_element_width_t input[BufferSize]; + constexpr size_t previous_order = Order / 2; + + rotate_vectors_recursively(input, src_rows); + + constexpr size_t half_order = Order / 2; + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t outer_i = 0; outer_i < BufferSize; outer_i += Order) { + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t inner_i = 0; inner_i < half_order; ++inner_i) { + dst_vectors[outer_i + inner_i] = + vtrn1q(reinterpret_cast(input[outer_i + inner_i]), + reinterpret_cast( + input[outer_i + inner_i + half_order])); + + dst_vectors[outer_i + half_order + inner_i] = + vtrn2q(reinterpret_cast(input[outer_i + inner_i]), + reinterpret_cast( + input[outer_i + inner_i + half_order])); + } + } + } +} + +// Rotates one tile of data with vector instructions. The tile's width and +// height are the number of Neon lanes for the given type. +template +static void vector_path(Rows src_rows, + Rows dst_rows) { + constexpr size_t num_of_lanes = VecTraits::num_lanes(); + using SrcVectorType = typename VecTraits::VectorType; + + // The number of vectors read and write is the same as the lane count of the + // given element size + constexpr size_t buffer_size = num_of_lanes; + + // Last rotate step is always done on 64 bit elements + uint64x2_t trn_result_b64[buffer_size]; // NOLINT(runtime/arrays) + + // The 64 bit rotate spans through all the vectors, so its "order" is the + // same as the number of vectors + constexpr size_t rotate_order_b64 = num_of_lanes; + + rotate_vectors_recursively(trn_result_b64, + src_rows); + + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t index = 0; index < buffer_size; ++index) { + trn_result_b64[index] = vreinterpretq_u64( + vrev64q(reinterpret_cast(trn_result_b64[index]))); + trn_result_b64[index] = vcombine(vget_high(trn_result_b64[index]), + vget_low(trn_result_b64[index])); + vst1q(&dst_rows.at(index)[0], trn_result_b64[index]); + } +} + +template +static void scalar_path(Rows src_rows, + Rows dst_rows, size_t height, + size_t width) { + for (size_t vindex = 0; vindex < height; ++vindex) { + disable_loop_vectorization(); + for (size_t hindex = 0; hindex < width; ++hindex) { + disable_loop_vectorization(); + // dst[j][src_height - i - 1] = src[i][j] + dst_rows.at(hindex)[height - vindex - 1] = src_rows.at(vindex)[hindex]; + } + } +} + +template +static kleidicv_error_t rotate(Rectangle rect, Rows src_rows, + Rows dst_rows) { + constexpr size_t num_of_lanes = VecTraits::num_lanes(); + auto handle_lane_number_of_rows = [&](size_t vindex) { + LoopUnroll2 horizontal_loop(rect.width(), num_of_lanes); + + horizontal_loop.unroll_once([&](size_t hindex) { + // if the input is big enough handle it tile by tile + vector_path( + src_rows.at(vindex, hindex), + dst_rows.at(hindex, rect.height() - vindex - num_of_lanes)); + }); + + // This branch is needed even for TryToAvoidTailLoop + horizontal_loop.remaining([&](size_t hindex, size_t final_hindex) { + scalar_path(src_rows.at(vindex, hindex), + dst_rows.at(hindex, rect.height() - vindex - num_of_lanes), + num_of_lanes, final_hindex - hindex); + }); + }; + + LoopUnroll2 vertical_loop(rect.height(), num_of_lanes); + + vertical_loop.unroll_once(handle_lane_number_of_rows); + + vertical_loop.remaining([&](size_t vindex, size_t final_vindex) { + scalar_path(src_rows.at(vindex), dst_rows.at(0, 0), final_vindex - vindex, + rect.width()); + }); + return KLEIDICV_OK; +} + +template +static kleidicv_error_t rotate(const void *src_void, size_t src_stride, + size_t src_width, size_t src_height, + void *dst_void, size_t dst_stride) { + MAKE_POINTER_CHECK_ALIGNMENT(const T, src, src_void); + MAKE_POINTER_CHECK_ALIGNMENT(T, dst, dst_void); + CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, src_width); + CHECK_IMAGE_SIZE(src_width, src_height); + + Rectangle rect{src_width, src_height}; + Rows dst_rows{dst, dst_stride}; + Rows src_rows{src, src_stride}; + + return rotate(rect, src_rows, dst_rows); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rotate(const void *src, size_t src_stride, size_t src_width, + size_t src_height, void *dst, size_t dst_stride, + int angle, size_t element_size) { + if (!rotate_is_implemented(src, dst, angle, element_size)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + switch (element_size) { + case sizeof(uint8_t): + return rotate(src, src_stride, src_width, src_height, dst, + dst_stride); + case sizeof(uint16_t): + return rotate(src, src_stride, src_width, src_height, dst, + dst_stride); + case sizeof(uint32_t): + return rotate(src, src_stride, src_width, src_height, dst, + dst_stride); + case sizeof(uint64_t): + return rotate(src, src_stride, src_width, src_height, dst, + dst_stride); + // GCOVR_EXCL_START + default: + assert(!"element size not implemented"); + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + // GCOVR_EXCL_STOP + } +} + +} // namespace kleidicv::neon diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index f122f79bc..c9c04cc14 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -263,6 +263,16 @@ kleidicv_error_t kleidicv_thread_saturating_add_abs_with_threshold_s16( size_t src_b_stride, int16_t *dst, size_t dst_stride, size_t width, size_t height, int16_t threshold, kleidicv_thread_multithreading); +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_rotate - see the documentation of +/// that function for more details. +kleidicv_error_t kleidicv_thread_rotate(const void *src, size_t src_stride, + size_t width, size_t height, void *dst, + size_t dst_stride, int angle, + size_t element_size, + kleidicv_thread_multithreading); + /// Internal - not part of the public API and its direct use is not supported. /// /// Multithreaded implementation of kleidicv_gaussian_blur_u8 - see the diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 0c2ff4201..b1e34b4a5 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -9,6 +9,7 @@ #include #include +#include "kleidicv/arithmetics/rotate.h" #include "kleidicv/filters/blur_and_downsample.h" #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/filters/scharr.h" @@ -53,8 +54,8 @@ static kleidicv_error_t kleidicv_thread_std_function_callback( template inline kleidicv_error_t parallel_batches(Callback callback, kleidicv_thread_multithreading mt, - unsigned count) { - const unsigned min_batch_size = 16; + unsigned count, + unsigned min_batch_size = 16) { const unsigned task_count = std::max(1U, (count) / min_batch_size); FunctionCallback f = [=](unsigned task_begin, unsigned task_end) { unsigned begin = task_begin * min_batch_size, @@ -214,6 +215,24 @@ kleidicv_error_t kleidicv_thread_saturating_add_abs_with_threshold_s16( src_b, src_b_stride, dst, dst_stride, width, height, threshold); } +kleidicv_error_t kleidicv_thread_rotate(const void *src, size_t src_stride, + size_t width, size_t height, void *dst, + size_t dst_stride, int angle, + size_t element_size, + kleidicv_thread_multithreading mt) { + if (!kleidicv::rotate_is_implemented(src, dst, angle, element_size)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + // reading in columns and writing out rows tends to perform better + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_rotate( + static_cast(src) + begin * element_size, src_stride, + end - begin, height, static_cast(dst) + begin * dst_stride, + dst_stride, angle, element_size); + }; + return parallel_batches(callback, mt, width, 64); +} + template inline kleidicv_error_t kleidicv_thread_yuv_sp_to_rgb_u8_impl( F f, const uint8_t *src_y, size_t src_y_stride, const uint8_t *src_uv, diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index 8b0df31b8..e071edcf0 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -115,6 +115,7 @@ CORE_TEST_PATTERNS=( '*Core_And*' '*Core_Mul*' '*Core_Sub*' + '*Core_Rotate*' '*Core_Transpose*' '*Core_MinMaxLoc*' '*MinMaxLoc*' diff --git a/test/api/test_rotate.cpp b/test/api/test_rotate.cpp new file mode 100644 index 000000000..447270237 --- /dev/null +++ b/test/api/test_rotate.cpp @@ -0,0 +1,208 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "kleidicv/kleidicv.h" + +class Rotate : public testing::TestWithParam { + public: + void scalar_test(size_t padding) { + size_t first_dim = test::Options::vector_lanes() - 1; + size_t second_dim = test::Options::vector_lanes() + 1; + // Exercise horizontal scalar path + test(first_dim, second_dim, padding); + test(second_dim, first_dim, padding); + } + + void vector_test(size_t padding) { + // Make at least two full vector passes + size_t src_width = 2 * test::Options::vector_lanes(); + // Set height to be different from width but still larger than vector_lanes + size_t src_height = 3 * test::Options::vector_lanes(); + test(src_width, src_height, padding); + } + + void vector_plus_scalar_test(size_t padding) { + size_t first_dim = 3 * test::Options::vector_lanes() - 1; + size_t second_dim = 3 * test::Options::vector_lanes() - 1; + test(first_dim, second_dim, padding); + test(second_dim, first_dim, padding); + } + + protected: + void test(size_t src_width, size_t src_height, size_t padding) const { + const size_t dst_width = src_height; + const size_t dst_height = src_width; + const int angle = 90; + size_t element_size = GetParam(); + size_t src_stride = (src_width + padding) * element_size; + size_t dst_stride = (dst_width + padding) * element_size; + + std::vector source(src_stride * src_height, 0); + std::vector expected(dst_stride * dst_height, 0); + std::vector actual_single(dst_stride * dst_height, 0); + + std::mt19937 generator{ + static_cast(test::Options::seed())}; + std::generate(source.begin(), source.end(), generator); + + calculate_expected(source.data(), expected.data(), src_width, src_height, + src_stride, dst_stride, element_size); + + ASSERT_EQ(KLEIDICV_OK, kleidicv_rotate(source.data(), src_stride, src_width, + src_height, actual_single.data(), + dst_stride, angle, element_size)); + + expect_eq_vector2D(expected.data(), actual_single.data(), dst_width, + dst_height, dst_stride, element_size); + } + + void expect_eq_vector2D(const uint8_t *lhs, const uint8_t *rhs, size_t width, + size_t height, size_t stride, + size_t element_size) const { + for (size_t i = 0; i < height; i++) { + for (size_t j = 0; j < width * element_size; j++) { + ASSERT_EQ(lhs[i * stride + j], rhs[i * stride + j]); + } + } + } + + void calculate_expected(const uint8_t *source, uint8_t *expected, + size_t src_width, size_t src_height, + size_t src_stride, size_t dst_stride, + size_t element_size) const { + for (size_t i = 0; i < src_height; i++) { + for (size_t j = 0; j < src_width; j++) { + // dst[j][src_height - i - 1] = src[i][j] + memcpy(expected + j * dst_stride + (src_height - i - 1) * element_size, + source + i * src_stride + j * element_size, element_size); + } + } + } +}; + +TEST_P(Rotate, ScalarNoPadding) { scalar_test(0); } + +TEST_P(Rotate, VectorNoPadding) { vector_test(0); } + +TEST_P(Rotate, ScalarWithPadding) { scalar_test(1); } + +TEST_P(Rotate, VectorWithPadding) { vector_test(1); } + +TEST_P(Rotate, VectorPlusScalarNoPadding) { vector_plus_scalar_test(0); } + +TEST_P(Rotate, VectorPlusScalarWithPadding) { vector_plus_scalar_test(1); } + +TEST_P(Rotate, NullPointer) { + std::vector src(1, 0); + std::vector dst(1, 0); + size_t element_size = GetParam(); + test::test_null_args(kleidicv_rotate, src.data(), element_size, 1, 1, + dst.data(), element_size, 90, element_size); +} + +TEST_P(Rotate, ZeroImageSize) { + std::vector src(1, 0); + std::vector dst(1, 0); + size_t element_size = GetParam(); + EXPECT_EQ(KLEIDICV_OK, + kleidicv_rotate(src.data(), element_size, 0, 1, dst.data(), + element_size, 90, element_size)); + EXPECT_EQ(KLEIDICV_OK, + kleidicv_rotate(src.data(), element_size, 1, 0, dst.data(), + element_size, 90, element_size)); +} + +TEST_P(Rotate, OversizeImage) { + std::vector src(1, 0); + std::vector dst(1, 0); + size_t element_size = GetParam(); + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_rotate(src.data(), element_size, KLEIDICV_MAX_IMAGE_PIXELS + 1, + 1, dst.data(), element_size, 90, element_size)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_rotate(src.data(), element_size, KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, dst.data(), element_size, + 90, element_size)); +} + +TEST_P(Rotate, Misalignment) { + size_t element_size = GetParam(); + if (element_size == 1) { + // misalignment impossible + GTEST_SKIP(); + } + + const size_t kBufSize = element_size * 10; + std::vector src(kBufSize, 0); + std::vector dst(kBufSize, 0); + + EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, + kleidicv_rotate(src.data() + 1, element_size, 1, 1, dst.data(), + element_size, 90, element_size)); + EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, + kleidicv_rotate(src.data(), element_size + 1, 1, 2, dst.data(), + element_size, 90, element_size)); + EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, + kleidicv_rotate(src.data(), element_size, 1, 1, dst.data() + 1, + element_size, 90, element_size)); + EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, + kleidicv_rotate(src.data(), element_size, 2, 1, dst.data(), + element_size + 1, 90, element_size)); + // Ignore stride if there's only one row + EXPECT_EQ(KLEIDICV_OK, + kleidicv_rotate(src.data(), element_size + 1, 1, 1, dst.data(), + element_size, 90, element_size)); + EXPECT_EQ(KLEIDICV_OK, + kleidicv_rotate(src.data(), element_size, 1, 1, dst.data(), + element_size + 1, 90, element_size)); +} + +INSTANTIATE_TEST_SUITE_P(, Rotate, testing::Values(1, 2, 4, 8), + testing::PrintToStringParamName()); + +TEST(RotateNotImplemented, InPlace) { + const size_t width = 1; + const size_t height = 1; + const size_t element_size = 1; + const size_t stride = width * element_size; + const int angle = 90; + + uint8_t source[width * height] = {}; + ASSERT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_rotate(source, stride, width, height, source, stride, + angle, element_size)); +} + +TEST(RotateNotImplemented, Angle) { + const size_t width = 1; + const size_t height = 1; + const size_t element_size = 1; + const size_t stride = width * element_size; + const int angle = 180; + + uint8_t source[width * height] = {}; + uint8_t dst[width * height] = {}; + ASSERT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_rotate(source, stride, width, height, dst, stride, angle, + element_size)); +} + +TEST(RotateNotImplemented, ElementSize) { + const size_t width = 1; + const size_t height = 1; + const size_t element_size = 16; + const size_t stride = width * element_size; + const int angle = 90; + + std::vector source(width * element_size * height, 0); + std::vector dst(width * element_size * height, 0); + ASSERT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_rotate(source.data(), stride, width, height, dst.data(), + stride, angle, element_size)); +} diff --git a/test/api/test_rotate_thread.cpp b/test/api/test_rotate_thread.cpp new file mode 100644 index 000000000..4268bb857 --- /dev/null +++ b/test/api/test_rotate_thread.cpp @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv_thread/kleidicv_thread.h" +#include "multithreading_fake.h" + +class RotateThread : public testing::TestWithParam { + public: + void scalar_test(size_t padding) { + size_t first_dim = test::Options::vector_lanes() - 1; + size_t second_dim = test::Options::vector_lanes() + 1; + // Exercise horizontal scalar path + test(first_dim, second_dim, padding); + test(second_dim, first_dim, padding); + } + + void vector_test(size_t padding) { + // Make the size at least two batches 2 * 64 = 128 for all element_size + size_t src_width = 8 * test::Options::vector_lanes(); + // Set height to be different from width but still larger than vector_lanes + size_t src_height = 12 * test::Options::vector_lanes(); + test(src_width, src_height, padding); + } + + void vector_plus_scalar_test(size_t padding) { + size_t first_dim = 12 * test::Options::vector_lanes() - 1; + size_t second_dim = 12 * test::Options::vector_lanes() - 1; + test(first_dim, second_dim, padding); + test(second_dim, first_dim, padding); + } + + protected: + void test(size_t src_width, size_t src_height, size_t padding) const { + const size_t dst_width = src_height; + const size_t dst_height = src_width; + const int angle = 90; + unsigned thread_count = 2; + size_t element_size = GetParam(); + size_t src_stride = (src_width + padding) * element_size; + size_t dst_stride = (dst_width + padding) * element_size; + + std::vector source(src_stride * src_height, 0); + std::vector expected(dst_stride * dst_height, 0); + std::vector actual_single(dst_stride * dst_height, 0); + std::vector actual_multi(dst_stride * dst_height, 0); + + std::mt19937 generator{ + static_cast(test::Options::seed())}; + std::generate(source.begin(), source.end(), generator); + + ASSERT_EQ(KLEIDICV_OK, kleidicv_rotate(source.data(), src_stride, src_width, + src_height, actual_single.data(), + dst_stride, angle, element_size)); + + ASSERT_EQ(KLEIDICV_OK, + kleidicv_thread_rotate(source.data(), src_stride, src_width, + src_height, actual_multi.data(), + dst_stride, angle, element_size, + get_multithreading_fake(thread_count))); + + expect_eq_vector2D(actual_multi.data(), actual_single.data(), dst_width, + dst_height, dst_stride, element_size); + } + + void expect_eq_vector2D(const uint8_t *lhs, const uint8_t *rhs, size_t width, + size_t height, size_t stride, + size_t element_size) const { + for (size_t i = 0; i < height; i++) { + for (size_t j = 0; j < width * element_size; j++) { + ASSERT_EQ(lhs[i * stride + j], rhs[i * stride + j]); + } + } + } +}; + +TEST_P(RotateThread, ScalarNoPadding) { scalar_test(0); } + +TEST_P(RotateThread, VectorNoPadding) { vector_test(0); } + +TEST_P(RotateThread, ScalarWithPadding) { scalar_test(1); } + +TEST_P(RotateThread, VectorWithPadding) { vector_test(1); } + +TEST_P(RotateThread, VectorPlusScalarNoPadding) { vector_plus_scalar_test(0); } + +TEST_P(RotateThread, VectorPlusScalarWithPadding) { + vector_plus_scalar_test(1); +} + +INSTANTIATE_TEST_SUITE_P(, RotateThread, testing::Values(1, 2, 4, 8), + testing::PrintToStringParamName()); + +TEST(RotateThreadNotImplemented, InPlace) { + const size_t width = 1; + const size_t height = 1; + const size_t element_size = 1; + const size_t stride = width * element_size; + const int angle = 90; + unsigned thread_count = 2; + + uint8_t source[width * height] = {}; + ASSERT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_thread_rotate(source, stride, width, height, source, + stride, angle, element_size, + get_multithreading_fake(thread_count))); +} + +TEST(RotateThreadNotImplemented, Angle) { + const size_t width = 1; + const size_t height = 1; + const size_t element_size = 1; + const size_t stride = width * element_size; + const int angle = 180; + unsigned thread_count = 2; + + uint8_t source[width * height] = {}; + uint8_t dst[width * height] = {}; + ASSERT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_thread_rotate(source, stride, width, height, dst, stride, + angle, element_size, + get_multithreading_fake(thread_count))); +} + +TEST(RotateThreadNotImplemented, ElementSize) { + const size_t width = 1; + const size_t height = 1; + const size_t element_size = 16; + const size_t stride = width * element_size; + const int angle = 90; + unsigned thread_count = 2; + + std::vector source(width * element_size * height, 0); + std::vector dst(width * element_size * height, 0); + ASSERT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_thread_rotate(source.data(), stride, width, height, + dst.data(), stride, angle, element_size, + get_multithreading_fake(thread_count))); +} -- GitLab