From b3a4c16b1142cdca449df84e69b7a9f7bf2ecb04 Mon Sep 17 00:00:00 2001 From: Denes Tarjan Date: Tue, 3 Sep 2024 13:15:04 +0000 Subject: [PATCH 1/2] Add HAL for remap16s to support CV_16SC2 coordinates --- adapters/opencv/opencv-4.10.patch | 62 ++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/adapters/opencv/opencv-4.10.patch b/adapters/opencv/opencv-4.10.patch index f3a8e6d0e..5e0269e60 100644 --- a/adapters/opencv/opencv-4.10.patch +++ b/adapters/opencv/opencv-4.10.patch @@ -19,7 +19,7 @@ index 2b4035285f..729cd1dd43 100644 @@ -281,6 +281,11 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const dst.create(dims, size, dtype); Mat dstMat = dst.getMat(); - + + if( dims <= 2 ) { + int width_in_elements = src.cols * cn; + CALL_HAL(convertTo, cv_hal_convertTo, src.data, src.step, src.depth(), dstMat.data, dstMat.step, dstMat.depth(), width_in_elements, src.rows, alpha, beta); @@ -35,7 +35,7 @@ index f78608dbad..299b5e54bd 100644 @@ -953,6 +953,41 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst #define cv_hal_transpose2d hal_ni_transpose2d //! @endcond - + +/** + @brief convertTo + @param src_data,src_step,src_depth Source image @@ -72,8 +72,8 @@ index f78608dbad..299b5e54bd 100644 +//! @endcond + //! @} - - + + diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index 8c6d8ad9a9..47eb6fdb66 100644 --- a/modules/core/src/minmax.cpp @@ -90,6 +90,58 @@ index 8c6d8ad9a9..47eb6fdb66 100644 return; } else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) +diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp +index 773fed9b48..3f545740f2 100644 +--- a/modules/imgproc/src/hal_replacement.hpp ++++ b/modules/imgproc/src/hal_replacement.hpp +@@ -328,6 +328,32 @@ inline int hal_ni_remap32f(int src_type, const uchar *src_data, size_t src_step, + #define cv_hal_remap32f hal_ni_remap32f + //! @endcond + ++/** ++ @brief hal_remap with short maps ++ @param src_type source and destination image type ++ @param src_data source image data ++ @param src_step source image step ++ @param src_width source image width ++ @param src_height source image height ++ @param dst_data destination image data ++ @param dst_step destination image step ++ @param dst_width destination image width ++ @param dst_height destination image height ++ @param mapxy map for interleaved x and y values ++ @param mapxy_step mapxy matrix step ++ @param border_type border processing mode (CV_HAL_BORDER_REFLECT, ...) ++ @param border_value values to use for CV_HAL_BORDER_CONSTANT mode ++ @sa cv::remap ++ */ ++inline int hal_ni_remap16s(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, ++ uchar *dst_data, size_t dst_step, int dst_width, int dst_height, ++ short* mapxy, size_t mapxy_step, int border_type, const double border_value[4]) ++{ return CV_HAL_ERROR_NOT_IMPLEMENTED; } ++ ++//! @cond IGNORED ++#define cv_hal_remap16s hal_ni_remap16s ++//! @endcond ++ + /** + @brief hal_cvtBGRtoBGR + @param src_data source image data +diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp +index d7c9c64c3c..1a382811fa 100644 +--- a/modules/imgproc/src/imgwarp.cpp ++++ b/modules/imgproc/src/imgwarp.cpp +@@ -1819,6 +1819,10 @@ void cv::remap( InputArray _src, OutputArray _dst, + { + CALL_HAL(remap32f, cv_hal_remap32f, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, + map1.ptr(), map1.step, map2.ptr(), map2.step, interpolation, borderType, borderValue.val); ++ } else if ((map1.type() == CV_16SC2) && map2.empty() && interpolation == INTER_NEAREST) ++ { ++ CALL_HAL(remap16s, cv_hal_remap16s, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, ++ map1.ptr(), map1.step, borderType, borderValue.val); + } + + interpolation &= ~WARP_RELATIVE_MAP; diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index d0f50a73bb..1c308887dc 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp @@ -97,7 +149,7 @@ index d0f50a73bb..1c308887dc 100644 @@ -654,6 +654,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kx, ky, borderType) ); - + + { + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); -- GitLab From 1f3d9bbaa7758a09c9748ef47b2e1d5b7ecea975 Mon Sep 17 00:00:00 2001 From: Denes Tarjan Date: Tue, 10 Sep 2024 15:22:30 +0000 Subject: [PATCH 2/2] Implement remap API with 16-bit integer coordinates --- CHANGELOG.md | 5 + adapters/opencv/kleidicv_hal.cpp | 26 +++ adapters/opencv/kleidicv_hal.h | 19 ++ adapters/opencv/opencv-4.10.patch | 2 +- conformity/opencv/test_remap.cpp | 83 ++++++++ conformity/opencv/tests.cpp | 1 + conformity/opencv/tests.h | 1 + doc/functionality.md | 5 + doc/opencv.md | 8 + kleidicv/include/kleidicv/kleidicv.h | 45 +++- kleidicv/include/kleidicv/remap/remap.h | 41 ++++ kleidicv/src/remap/remap_api.cpp | 13 ++ kleidicv/src/remap/remap_neon.cpp | 113 ++++++++++ .../include/kleidicv_thread/kleidicv_thread.h | 11 + kleidicv_thread/src/kleidicv_thread.cpp | 21 ++ scripts/benchmark/run_benchmarks_4K.sh | 2 + scripts/benchmark/run_benchmarks_FHD.sh | 2 + test/api/test_remap.cpp | 195 ++++++++++++++++++ test/api/test_thread.cpp | 80 +++++++ 19 files changed, 671 insertions(+), 2 deletions(-) create mode 100644 conformity/opencv/test_remap.cpp create mode 100644 kleidicv/include/kleidicv/remap/remap.h create mode 100644 kleidicv/src/remap/remap_api.cpp create mode 100644 kleidicv/src/remap/remap_neon.cpp create mode 100644 test/api/test_remap.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index a87ded84b..088513dc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,11 @@ KleidiCV uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). This changelog aims to follow the guiding principles of [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## 0.3.0 - not yet released + +### Added +- Remap implementation for 2-channel s16 coordinates and 1-channel u8 input. + ## 0.2.0 - 2024-09-30 ### Added diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 9c8736f7e..f55a3aa7a 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -1213,4 +1213,30 @@ int inRange_f32(const uchar *src_data, size_t src_step, uchar *dst_data, static_cast(lower_bound), static_cast(upper_bound))); } +int remap_s16(int src_type, const uchar *src_data, size_t src_step, + int src_width, int src_height, uchar *dst_data, size_t dst_step, + int dst_width, int dst_height, const int16_t *mapxy, + size_t mapxy_step, int border_type, + [[maybe_unused]] const double border_value[4]) { + kleidicv_border_type_t kleidicv_border_type; + if (from_opencv(border_type, kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + // This will be used when constant borders are implemented + kleidicv_border_values_t border_values = {}; + auto mt = get_multithreading(); + + if (src_type == CV_8UC1) { + return convert_error(kleidicv_thread_remap_s16_u8( + src_data, src_step, static_cast(src_width), + static_cast(src_height), dst_data, dst_step, + static_cast(dst_width), static_cast(dst_height), + CV_MAT_CN(src_type), mapxy, mapxy_step, kleidicv_border_type, + border_values, mt)); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + } // namespace kleidicv::hal diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index d4619aa58..8eea46fa0 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -137,6 +137,10 @@ int inRange_f32(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int dst_depth, int width, int height, int cn, double lower_bound, double upper_bound); +int remap_s16(int src_type, const uchar *src_data, size_t src_step, + int src_width, int src_height, uchar *dst_data, size_t dst_step, + int dst_width, int dst_height, const int16_t *mapxy, + size_t mapxy_step, int border_type, const double border_value[4]); } // namespace hal } // namespace kleidicv @@ -351,6 +355,21 @@ static inline int kleidicv_canny_with_fallback( #define cv_hal_canny kleidicv_canny_with_fallback #endif // KLEIDICV_EXPERIMENTAL_FEATURE_CANNY +// remap +static inline int kleidicv_remap_s16_with_fallback( + int src_type, const uchar *src_data, size_t src_step, int src_width, + int src_height, uchar *dst_data, size_t dst_step, int dst_width, + int dst_height, const int16_t *mapxy, size_t mapxy_step, int border_type, + const double border_value[4]) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + remap_s16, cv_hal_remap16s, src_type, src_data, src_step, src_width, + src_height, dst_data, dst_step, dst_width, dst_height, mapxy, mapxy_step, + border_type, border_value); +} + +#undef cv_hal_remap16s +#define cv_hal_remap16s kleidicv_remap_s16_with_fallback + #endif // OPENCV_IMGPROC_HAL_REPLACEMENT_HPP #ifdef OPENCV_CORE_HAL_REPLACEMENT_HPP diff --git a/adapters/opencv/opencv-4.10.patch b/adapters/opencv/opencv-4.10.patch index 5e0269e60..0ab00eb69 100644 --- a/adapters/opencv/opencv-4.10.patch +++ b/adapters/opencv/opencv-4.10.patch @@ -117,7 +117,7 @@ index 773fed9b48..3f545740f2 100644 + */ +inline int hal_ni_remap16s(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, + uchar *dst_data, size_t dst_step, int dst_width, int dst_height, -+ short* mapxy, size_t mapxy_step, int border_type, const double border_value[4]) ++ const short* mapxy, size_t mapxy_step, int border_type, const double border_value[4]) +{ return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +//! @cond IGNORED diff --git a/conformity/opencv/test_remap.cpp b/conformity/opencv/test_remap.cpp new file mode 100644 index 000000000..321f7c623 --- /dev/null +++ b/conformity/opencv/test_remap.cpp @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +#include "opencv2/core/hal/interface.h" +#include "opencv2/imgproc/hal/interface.h" +#include "tests.h" + +const size_t kMaxHeight = 32, kMaxWidth = 32; + +template +static cv::Mat get_source_mat(int Format) { + auto generate_source = [&]() { + cv::Mat m{kMaxHeight, kMaxWidth, Format}; + for (size_t row = 0; row < kMaxHeight; ++row) { + for (size_t column = 0; column < kMaxWidth; ++column) { + m.at(row, column) = + (row * kMaxWidth + column) % std::numeric_limits::max(); + } + } + return m; + }; + static cv::Mat source = generate_source(); + return source; +} + +// BorderValue is interpreted as 1/1000, i.e. 500 for 0.5 +template +cv::Mat exec_remap16(cv::Mat& mapxy_mat) { + cv::Mat empty; + cv::Mat result(mapxy_mat.size().height, mapxy_mat.size().width, Format); + cv::Mat source_mat = get_source_mat(Format); + remap(source_mat, result, mapxy_mat, empty, Interpolation, BorderMode, + BorderValue / 1000.0); + return result; +} + +#if MANAGER +template +bool test_remap16(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= kMaxWidth; x += 3) { + for (size_t y = 5; y <= kMaxHeight; y += 2) { + cv::Mat source_mat = get_source_mat(Format); + cv::Mat mapxy_mat(x, y, CV_16SC2); + rng.fill(mapxy_mat, cv::RNG::UNIFORM, -3, kMaxWidth + 3); + + cv::Mat actual_mat = exec_remap16(mapxy_mat); + cv::Mat expected_mat = get_expected_from_subordinate( + index, request_queue, reply_queue, mapxy_mat); + + bool success = + (CV_MAT_DEPTH(Format) == CV_8U && + !are_matrices_different(1, actual_mat, expected_mat)) || + (CV_MAT_DEPTH(Format) == CV_16U && + !are_matrices_different(1, actual_mat, expected_mat)); + if (!success) { + fail_print_matrices(x, y, source_mat, actual_mat, expected_mat); + return true; + } + } + } + return false; +} +#endif + +std::vector& remap_tests_get() { + // clang-format off + static std::vector tests = { + TEST("Remap16s uint8", (test_remap16), (exec_remap16)), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index bce00a813..59a957658 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -39,6 +39,7 @@ std::vector all_tests = merge_tests({ scale_tests_get, min_max_tests_get, in_range_tests_get, + remap_tests_get, // clang-format on }); diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index da253cb66..8b0efd612 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -22,5 +22,6 @@ std::vector& resize_tests_get(); std::vector& scale_tests_get(); std::vector& min_max_tests_get(); std::vector& in_range_tests_get(); +std::vector& remap_tests_get(); #endif // KLEIDICV_OPENCV_CONFORMITY_TESTS_H_ diff --git a/doc/functionality.md b/doc/functionality.md index 57db8e325..7ffd1c42f 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -89,3 +89,8 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | 2x2 | x | x | | 4x4 | x | x | | 8x8 | | x | + +# Remap +| | u8 | u16 | +|--------------------------------|-----|-----| +| Remap int16 coordinates | x | | diff --git a/doc/opencv.md b/doc/opencv.md index 1b859c9a7..1748c4acc 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -190,3 +190,11 @@ Currently only scalar bounds are supported. Notes on parameters: * `src.depth()` - only supports `CV_8U` and `CV_32F` depths and 1 channel. * `src`, `lowerb` and `upperb` need to have the same type. + +### [`cv::remap()`](https://docs.opencv.org/4.10.0/da/d54/group__imgproc__transform.html#gab75ef31ce5cdfb5c44b6da5f3b908ea4) +Geometrically transforms the `src` image by taking the pixels specified by the coordinates from the `map` image. +Notes on parameters: +* `src.depth()` - only supports `CV_8U` depth and 1 channel. +* `map1` shall be 16SC2 and `map2` shall be empty +* `interpolation` shall be `INTER_NEAREST` +* `borderMode` shall be `BORDER_REPLICATE` diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 78f95f105..afdbbb500 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1631,7 +1631,7 @@ KLEIDICV_API_DECLARATION(kleidicv_float_conversion_u8_f32, const uint8_t *src, /// start of the next row for the source data. Must /// not be less than width * sizeof(type), except for /// single-row images. -/// @param dst Pointer to the first destination data. Must be non-null. +/// @param dst Pointer to the destination data. Must be non-null. /// @param dst_stride Distance in bytes from the start of one row to the /// start of the next row for the destination data. Must /// not be less than width * sizeof(type), except for @@ -1651,6 +1651,49 @@ KLEIDICV_API_DECLARATION(kleidicv_in_range_f32, const float *src, size_t width, size_t height, float lower_bound, float upper_bound); +/// Transforms the `src` image by taking the pixels specified by the coordinates +/// from the `mapxy` image. +/// +/// Width and height are the same for `mapxy` and for `dst`. `src` dimensions +/// may be different, but due to the 16-bit signed format, its width and height +/// must not be bigger than 32767. Coordinates outside of `src` dimensions are +/// considered border. In case of @ref KLEIDICV_BORDER_TYPE_REPLICATE, that +/// means that negative coordinates map to the first row/column (zero), and +/// those bigger than height/width - 1 map to the last row/column. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. Must +/// not be less than width * sizeof(type), except for +/// single-row images. +/// @param src_width Number of elements in the source row. +/// @param src_height Number of rows in the source data. +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must be a multiple of sizeof(type) and no less than +/// width * sizeof(type), except for single-row images. +/// @param dst_width Number of elements in the destination row. +/// @param dst_height Number of rows in the destination data. +/// @param mapxy Pointer to the mapping data. Must be non-null. +/// @param mapxy_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must be a multiple of sizeof(int16_t) and no less than +/// width * sizeof(int16_t), except for single-row images. +/// @param channels Number of channels in the data. Must be 1. +/// @param border_type Way of handling the border. The supported border types +/// are: \n +/// - @ref KLEIDICV_BORDER_TYPE_REPLICATE +/// @param border_values Border values if the border_type is +/// @ref KLEIDICV_BORDER_TYPE_CONSTANT. +KLEIDICV_API_DECLARATION(kleidicv_remap_s16_u8, const uint8_t *src, + size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, + size_t dst_height, size_t channels, + const int16_t *mapxy, size_t mapxy_stride, + kleidicv_border_type_t border_type, + kleidicv_border_values_t border_values); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv/include/kleidicv/remap/remap.h b/kleidicv/include/kleidicv/remap/remap.h new file mode 100644 index 000000000..5e8845b77 --- /dev/null +++ b/kleidicv/include/kleidicv/remap/remap.h @@ -0,0 +1,41 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_REMAP_REMAP_H +#define KLEIDICV_REMAP_REMAP_H + +#include + +#include "kleidicv/ctypes.h" + +namespace kleidicv { + +template +inline bool remap_s16_is_implemented(size_t dst_width, + kleidicv_border_type_t border_type, + size_t channels) { + if constexpr (std::is_same::value) { + return (dst_width >= 8 && + border_type == + kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_REPLICATE && + channels == 1); + } else { + return false; + } +} + +namespace neon { + +template +kleidicv_error_t remap_s16(const T *src, size_t src_stride, size_t src_width, + size_t src_height, T *dst, size_t dst_stride, + size_t dst_width, size_t dst_height, size_t channels, + const int16_t *mapxy, size_t mapxy_stride, + kleidicv_border_type_t border_type, + kleidicv_border_values_t border_values); +} // namespace neon + +} // namespace kleidicv + +#endif // KLEIDICV_REMAP_REMAP_H diff --git a/kleidicv/src/remap/remap_api.cpp b/kleidicv/src/remap/remap_api.cpp new file mode 100644 index 000000000..b8f85b4cc --- /dev/null +++ b/kleidicv/src/remap/remap_api.cpp @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/remap/remap.h" + +#define KLEIDICV_DEFINE_C_API(outer_name, inner_name, type) \ + KLEIDICV_MULTIVERSION_C_API(outer_name, &kleidicv::neon::inner_name, \ + nullptr, nullptr) + +KLEIDICV_DEFINE_C_API(kleidicv_remap_s16_u8, remap_s16, uint8_t); diff --git a/kleidicv/src/remap/remap_neon.cpp b/kleidicv/src/remap/remap_neon.cpp new file mode 100644 index 000000000..27d5d06c6 --- /dev/null +++ b/kleidicv/src/remap/remap_neon.cpp @@ -0,0 +1,113 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" +#include "kleidicv/remap/remap.h" + +namespace kleidicv::neon { + +template +class RemapS16; + +template <> +class RemapS16 { + public: + using ScalarType = uint8_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; + using MapVector2Type = typename MapVecTraits::Vector2Type; + using VecTraits = neon::VecTraits; + using VectorType = typename VecTraits::VectorType; + + explicit RemapS16(Rows src_rows, size_t src_width, + size_t src_height) + : src_rows_{src_rows}, + v_src_stride_{vdupq_n_s16(static_cast(src_rows_.stride()))}, + v_xmax_{vdupq_n_s16(static_cast(src_width - 1))}, + v_ymax_{vdupq_n_s16(static_cast(src_height - 1))} {} + + void process_row(size_t width, Columns mapxy, + Columns dst) { + auto vector_path = [&](size_t step) { + MapVector2Type xy = vld2q_s16(&mapxy[0]); + // Clamp coordinates to within the dimensions of the source image + uint16x8_t x = vreinterpretq_u16_s16( + vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[0], v_xmax_))); + uint16x8_t y = vreinterpretq_u16_s16( + vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[1], v_ymax_))); + // Calculate offsets from coordinates (y * stride + x) + uint32x4_t indices_low = + vmlal_u16(vmovl_u16(vget_low_u16(x)), vget_low_u16(y), + vget_low_u16(v_src_stride_)); + // Copy pixels from source + dst[0] = src_rows_[vgetq_lane_u32(indices_low, 0)]; + dst[1] = src_rows_[vgetq_lane_u32(indices_low, 1)]; + dst[2] = src_rows_[vgetq_lane_u32(indices_low, 2)]; + dst[3] = src_rows_[vgetq_lane_u32(indices_low, 3)]; + uint32x4_t indices_high = + vmlal_high_u16(vmovl_high_u16(x), y, v_src_stride_); + dst[4] = src_rows_[vgetq_lane_u32(indices_high, 0)]; + dst[5] = src_rows_[vgetq_lane_u32(indices_high, 1)]; + dst[6] = src_rows_[vgetq_lane_u32(indices_high, 2)]; + dst[7] = src_rows_[vgetq_lane_u32(indices_high, 3)]; + mapxy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + + LoopUnroll loop{width, MapVecTraits::num_lanes()}; + loop.unroll_once(vector_path); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapxy -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t step) { vector_path(step); }); + } + + private: + Rows src_rows_; + int16x8_t v_src_stride_; + int16x8_t v_xmax_; + int16x8_t v_ymax_; +}; // end of class RemapS16 + +template +kleidicv_error_t remap_s16( + const T *src, size_t src_stride, size_t src_width, size_t src_height, + T *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, const int16_t *mapxy, size_t mapxy_stride, + kleidicv_border_type_t border_type, + [[maybe_unused]] kleidicv_border_values_t border_values) { + CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); + CHECK_POINTER_AND_STRIDE(mapxy, mapxy_stride, dst_height); + CHECK_IMAGE_SIZE(src_width, src_height); + CHECK_IMAGE_SIZE(dst_width, dst_height); + + if (!remap_s16_is_implemented(dst_width, border_type, channels)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + Rows src_rows{src, src_stride, channels}; + Rows mapxy_rows{mapxy, mapxy_stride, 2}; + Rows dst_rows{dst, dst_stride, channels}; + RemapS16 operation{src_rows, src_width, src_height}; + Rectangle rect{dst_width, dst_height}; + zip_rows(operation, rect, mapxy_rows, dst_rows); + return KLEIDICV_OK; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16( \ + const type *src, size_t src_stride, size_t src_width, size_t src_height, \ + type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \ + size_t channels, const int16_t *mapxy, size_t mapxy_stride, \ + kleidicv_border_type_t border_type, \ + kleidicv_border_values_t border_values) + +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); + +} // namespace kleidicv::neon diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index eb5ba5fe1..9d44879de 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -356,6 +356,17 @@ kleidicv_error_t kleidicv_thread_resize_linear_f32( float *dst, size_t dst_stride, size_t dst_width, size_t dst_height, kleidicv_thread_multithreading); +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_remap_s16_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_remap_s16_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, const int16_t *mapxy, size_t mapxy_stride, + kleidicv_border_type_t border_type, kleidicv_border_values_t border_values, + kleidicv_thread_multithreading); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 97f938c32..e782e48ec 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -13,6 +13,7 @@ #include "kleidicv/filters/separable_filter_2d.h" #include "kleidicv/filters/sobel.h" #include "kleidicv/kleidicv.h" +#include "kleidicv/remap/remap.h" #include "kleidicv/resize/resize_linear.h" typedef std::function FunctionCallback; @@ -587,3 +588,23 @@ kleidicv_error_t kleidicv_thread_resize_linear_f32( }; return parallel_batches(callback, mt, std::max(1, src_height - 1)); } + +kleidicv_error_t kleidicv_thread_remap_s16_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, const int16_t *mapxy, size_t mapxy_stride, + kleidicv_border_type_t border_type, kleidicv_border_values_t border_values, + kleidicv_thread_multithreading mt) { + if (!kleidicv::remap_s16_is_implemented(dst_width, border_type, + channels)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_remap_s16_u8(src, src_stride, src_width, src_height, + dst + begin * dst_stride / sizeof(uint8_t), + dst_stride, dst_width, end - begin, channels, + mapxy + begin * mapxy_stride / sizeof(int16_t), + mapxy_stride, border_type, border_values); + }; + return parallel_batches(callback, mt, dst_height); +} diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 8dad32529..27ffddacf 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -92,6 +92,8 @@ benchmarks=( "InRange_U8: opencv_perf_core '*inRangeScalar/*' '(3840x2160, 8UC1, 1, 2)'" "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(3840x2160, 32FC1, 1, 2)'" + + "Remap_S16_U8: opencv_perf_imgproc '*Remap/*' '(3840x2160, 8UC1, 16SC2, INTER_NEAREST)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index 7adf7a25f..03085cef8 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -92,6 +92,8 @@ benchmarks=( "InRange_U8: opencv_perf_core '*inRangeScalar/*' '(1920x1080, 8UC1, 1, 2)'" "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(1920x1080, 32FC1, 1, 2)'" + + "Remap_S16_U8: opencv_perf_imgproc '*Remap/*' '(1920x1080, 8UC1, 16SC2, INTER_NEAREST)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/test/api/test_remap.cpp b/test/api/test_remap.cpp new file mode 100644 index 000000000..888885bdf --- /dev/null +++ b/test/api/test_remap.cpp @@ -0,0 +1,195 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" + +template +class Remap16 : public testing::Test { + public: + static void test_random(size_t src_w, size_t src_h, size_t dst_w, + size_t dst_h, size_t channels, size_t padding) { + test::Array2D mapxy{2 * dst_w, dst_h, padding, 2}; + test::PseudoRandomNumberGenerator coord_generator; + mapxy.fill(coord_generator); + execute_test(mapxy, src_w, src_h, dst_w, dst_h, channels, padding); + } + + static void test_outside_random(size_t src_w, size_t src_h, size_t dst_w, + size_t dst_h, size_t channels, + size_t padding) { + test::Array2D mapxy{2 * dst_w, dst_h, padding, 2}; + test::PseudoRandomNumberGeneratorIntRange coord_generator{ + static_cast(-src_w), static_cast(2 * src_w)}; + mapxy.fill(coord_generator); + execute_test(mapxy, src_w, src_h, dst_w, dst_h, channels, padding); + } + + static void test_blend(size_t src_w, size_t src_h, size_t dst_w, size_t dst_h, + size_t channels, size_t padding) { + test::Array2D mapxy{2 * dst_w, dst_h, padding, 2}; + for (size_t row = 0; row < dst_h; ++row) { + for (size_t column = 0; column < dst_w; ++column) { + // Use a second degree function to add a nonlinear blend to the image + *mapxy.at(row, column * 2) = std::max( + 0, std::min( + static_cast(src_w - 1), + static_cast(column * 2 - column * column / dst_w))); + *mapxy.at(row, column * 2 + 1) = std::max( + 0, std::min(static_cast(src_h - 1), + static_cast(row * (dst_w - column) / dst_w + + 4 * row / dst_h))); + } + } + execute_test(mapxy, src_w, src_h, dst_w, dst_h, channels, padding); + } + + private: + static void execute_test(test::Array2D &mapxy, size_t src_w, + size_t src_h, size_t dst_w, size_t dst_h, + size_t channels, size_t padding) { + size_t src_total_width = channels * src_w; + size_t dst_total_width = channels * dst_w; + + test::Array2D source{src_total_width, src_h, padding, channels}; + test::Array2D actual{dst_total_width, dst_h, padding, channels}; + test::Array2D expected{dst_total_width, dst_h, padding, + channels}; + + test::PseudoRandomNumberGenerator generator; + source.fill(generator); + actual.fill(42); + + calculate_expected(source, mapxy, expected); + + ASSERT_EQ(KLEIDICV_OK, + kleidicv_remap_s16_u8( + source.data(), source.stride(), source.width(), + source.height(), actual.data(), actual.stride(), + actual.width(), actual.height(), channels, mapxy.data(), + mapxy.stride(), KLEIDICV_BORDER_TYPE_REPLICATE, {})); + + EXPECT_EQ_ARRAY2D(actual, expected); + } + static void calculate_expected(test::Array2D &src, + test::Array2D &mapxy, + test::Array2D &expected) { + for (size_t row = 0; row < expected.height(); row++) { + for (size_t column = 0; column < expected.width() / src.channels(); + ++column) { + for (size_t ch = 0; ch < src.channels(); ++ch) { + int16_t y = std::max( + 0, std::min(src.height() - 1, + *mapxy.at(row, column * 2 + 1))); + int16_t x = std::max( + 0, + std::min(src.width() - 1, *mapxy.at(row, column * 2))); + *expected.at(row, column * src.channels() + ch) = + *src.at(y, x * src.channels() + ch); + } + } + } + } +}; + +using RemapElementTypes = ::testing::Types; +TYPED_TEST_SUITE(Remap16, RemapElementTypes); + +TYPED_TEST(Remap16, RandomNoPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + TestFixture::test_random(src_w, src_h, dst_w, dst_h, 1, 0); +} + +TYPED_TEST(Remap16, OutsideRandomPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + TestFixture::test_outside_random(src_w, src_h, dst_w, dst_h, 1, 0); +} + +TYPED_TEST(Remap16, BlendPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + TestFixture::test_blend(src_w, src_h, dst_w, dst_h, 1, 13); +} + +TYPED_TEST(Remap16, NullPointer) { + const TypeParam src[4] = {}; + TypeParam dst[1]; + int16_t mapxy[2] = {}; + test::test_null_args(kleidicv_remap_s16_u8, src, 2, 2, 2, dst, 1, 1, 1, 1, + mapxy, 4, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{}); +} + +TYPED_TEST(Remap16, ZeroImageSize) { + const TypeParam src[1] = {}; + TypeParam dst[1]; + int16_t mapxy[2] = {}; + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_s16_u8(src, 1, 0, 1, dst, 1, 0, 1, 1, mapxy, 4, + KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_s16_u8(src, 1, 1, 0, dst, 1, 1, 0, 1, mapxy, 4, + KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); +} + +TYPED_TEST(Remap16, InvalidImageSize) { + const TypeParam src[1] = {}; + TypeParam dst[8]; + int16_t mapxy[16] = {}; + + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16_u8(src, 1, KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, dst, 8, 8, + 1, 1, mapxy, 4, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16_u8(src, 1, KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, dst, 8, 8, 1, 1, + mapxy, 4, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16_u8(src, 1, 1, 1, dst, 1, KLEIDICV_MAX_IMAGE_PIXELS + 1, + 1, 1, mapxy, 4, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16_u8( + src, 1, 1, 1, dst, 1, KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, 1, mapxy, 4, + KLEIDICV_BORDER_TYPE_REPLICATE, kleidicv_border_values_t{})); +} + +TYPED_TEST(Remap16, NotSupported) { + const TypeParam src[1] = {}; + TypeParam dst[8]; + int16_t mapxy[16] = {}; + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_s16_u8(src, 1, 1, 1, dst, 8, 8, 1, 2, mapxy, 4, + KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_s16_u8(src, 1, 1, 1, dst, 8, 8, 1, 1, mapxy, 4, + KLEIDICV_BORDER_TYPE_CONSTANT, + kleidicv_border_values_t{})); +} diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index c16ae53ca..5366c51b2 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -104,6 +104,71 @@ class Thread : public testing::TestWithParam

{ border_type, context); ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } + + template + void check_remap16s(SingleThreadedFunc single_threaded_func, + MultithreadedFunc multithreaded_func, size_t channels, + Args... args) { + unsigned test_width = 0, height = 0, thread_count = 0; + std::tie(test_width, height, thread_count) = GetParam(); + const unsigned src_width = 300, src_height = 300; + // width < 8 are not supported, that's not tested here + size_t width = test_width + 8; + test::Array2D src(size_t{src_width} * channels, src_height); + test::Array2D mapxy(width * 2, height); + test::Array2D dst_single(width * channels, height), + dst_multi(width * channels, height); + + test::PseudoRandomNumberGenerator src_generator; + src.fill(src_generator); + test::PseudoRandomNumberGeneratorIntRange coord_generator{ + static_cast(-src_width / 4), + static_cast(src_width * 4 / 3)}; + mapxy.fill(coord_generator); + + kleidicv_error_t single_result = single_threaded_func( + src.data(), src.stride(), src_width, src_height, dst_single.data(), + dst_single.stride(), width, height, channels, mapxy.data(), + mapxy.stride(), args...); + + kleidicv_error_t multi_result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst_multi.data(), + dst_multi.stride(), width, height, channels, mapxy.data(), + mapxy.stride(), args..., get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, single_result); + EXPECT_EQ(KLEIDICV_OK, multi_result); + EXPECT_EQ_ARRAY2D(dst_multi, dst_single); + } + + template + void check_remap16s_not_implemented(MultithreadedFunc multithreaded_func, + size_t channels, Args... args) { + unsigned test_width = 0, height = 0, thread_count = 0; + std::tie(test_width, height, thread_count) = GetParam(); + const unsigned src_width = 300, src_height = 300; + // width < 8 are not supported! + size_t width = test_width + 8; + test::Array2D src(size_t{src_width} * channels, src_height); + test::Array2D mapxy(width * 2, height); + test::Array2D dst_small(test_width * channels, height), + dst(width * channels, height); + + kleidicv_error_t result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst.data(), + dst.stride(), width, height, channels, mapxy.data(), mapxy.stride(), + args..., get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); + + result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst_small.data(), + dst_small.stride(), test_width, height, channels, mapxy.data(), + mapxy.stride(), args..., get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); + } }; #define TEST_UNARY_OP(suffix, SrcT, DstT, ...) \ @@ -282,6 +347,21 @@ TEST(ThreadSeparableFilter2D, NotImplemented) { kleidicv_thread_separable_filter_2d_u16); } +TEST_P(Thread, remap16s_u8_border_replicate) { + check_remap16s(kleidicv_remap_s16_u8, kleidicv_thread_remap_s16_u8, + 1, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{}); +} + +TEST_P(Thread, remap16s_u8_not_implemented) { + check_remap16s_not_implemented(kleidicv_thread_remap_s16_u8, 2, + KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{}); + check_remap16s_not_implemented(kleidicv_thread_remap_s16_u8, 1, + KLEIDICV_BORDER_TYPE_CONSTANT, + kleidicv_border_values_t{}); +} + TEST_P(Thread, SobelHorizontal1Channel) { check_unary_op(kleidicv_sobel_3x3_horizontal_s16_u8, kleidicv_thread_sobel_3x3_horizontal_s16_u8, -- GitLab