From 43ba830ad100cc2ceaea6899bd55e6b0ee818cd9 Mon Sep 17 00:00:00 2001 From: Denes Tarjan Date: Fri, 4 Oct 2024 14:22:16 +0000 Subject: [PATCH] Implement remap API with 16-bit + 5-bit fixed-point coordinates --- CHANGELOG.md | 4 +- adapters/opencv/kleidicv_hal.cpp | 41 +++- adapters/opencv/kleidicv_hal.h | 28 ++- adapters/opencv/opencv-4.10.patch | 42 +++- conformity/opencv/test_remap.cpp | 89 +++++-- doc/functionality.md | 7 +- doc/opencv.md | 9 +- kleidicv/include/kleidicv/kleidicv.h | 14 ++ kleidicv/include/kleidicv/remap/remap.h | 24 ++ kleidicv/src/remap/remap_api.cpp | 1 + kleidicv/src/remap/remap_neon.cpp | 176 +++++++++++++- .../include/kleidicv_thread/kleidicv_thread.h | 12 + kleidicv_thread/src/kleidicv_thread.cpp | 22 ++ scripts/benchmark/run_benchmarks_4K.sh | 1 + scripts/benchmark/run_benchmarks_FHD.sh | 1 + test/api/test_remap.cpp | 221 ++++++++++++++++++ test/api/test_thread.cpp | 121 ++++++++-- 17 files changed, 761 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 088513dc5..20d100bbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,9 @@ This changelog aims to follow the guiding principles of ## 0.3.0 - not yet released ### Added -- Remap implementation for 2-channel s16 coordinates and 1-channel u8 input. +- Remap implementation + - 2-channel s16 and s16+u16 fixed-point coordinates and 1-channel u8 input. + - 2-channel s16 + 5+5 bits' fractions fixed-point coordinates and 1-channel u8 input. ## 0.2.0 - 2024-09-30 diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index f55a3aa7a..6c1c37829 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -1217,14 +1217,19 @@ int remap_s16(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const int16_t *mapxy, size_t mapxy_step, int border_type, - [[maybe_unused]] const double border_value[4]) { + const double border_value[4]) { kleidicv_border_type_t kleidicv_border_type; if (from_opencv(border_type, kleidicv_border_type)) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } - // This will be used when constant borders are implemented kleidicv_border_values_t border_values = {}; + if (border_type == kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_CONSTANT) { + border_values.top = border_value[0]; + border_values.left = border_value[1]; + border_values.bottom = border_value[2]; + border_values.right = border_value[3]; + } auto mt = get_multithreading(); if (src_type == CV_8UC1) { @@ -1239,4 +1244,36 @@ int remap_s16(int src_type, const uchar *src_data, size_t src_step, return CV_HAL_ERROR_NOT_IMPLEMENTED; } +int remap_s16point5(int src_type, const uchar *src_data, size_t src_step, + int src_width, int src_height, uchar *dst_data, + size_t dst_step, int dst_width, int dst_height, + const int16_t *mapxy, size_t mapxy_step, + const uint16_t *mapfrac, size_t mapfrac_step, + int border_type, const double border_value[4]) { + kleidicv_border_type_t kleidicv_border_type; + if (from_opencv(border_type, kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_border_values_t border_values = {}; + if (border_type == kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_CONSTANT) { + border_values.top = border_value[0]; + border_values.left = border_value[1]; + border_values.bottom = border_value[2]; + border_values.right = border_value[3]; + } + auto mt = get_multithreading(); + + if (src_type == CV_8UC1) { + return convert_error(kleidicv_thread_remap_s16point5_u8( + src_data, src_step, static_cast(src_width), + static_cast(src_height), dst_data, dst_step, + static_cast(dst_width), static_cast(dst_height), 1, + mapxy, mapxy_step, mapfrac, mapfrac_step, kleidicv_border_type, + border_values, mt)); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + } // namespace kleidicv::hal diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 7e13db7fd..c45eaede7 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -141,6 +141,13 @@ int remap_s16(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const int16_t *mapxy, size_t mapxy_step, int border_type, const double border_value[4]); + +int remap_s16point5(int src_type, const uchar *src_data, size_t src_step, + int src_width, int src_height, uchar *dst_data, + size_t dst_step, int dst_width, int dst_height, + const int16_t *mapxy, size_t mapxy_step, + const uint16_t *mapfrac, size_t mapfrac_step, + int border_type, const double border_value[4]); } // namespace hal } // namespace kleidicv @@ -355,10 +362,10 @@ static inline int kleidicv_canny_with_fallback( #define cv_hal_canny kleidicv_canny_with_fallback #endif // KLEIDICV_EXPERIMENTAL_FEATURE_CANNY +// remap // This condition can be removed if this HAL macro is defined in all supported // versions #ifdef cv_hal_remap16s -// remap static inline int kleidicv_remap_s16_with_fallback( int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, @@ -374,6 +381,25 @@ static inline int kleidicv_remap_s16_with_fallback( #define cv_hal_remap16s kleidicv_remap_s16_with_fallback #endif // cv_hal_remap16s +// This condition can be removed if this HAL macro is defined in all supported +// versions +#ifdef cv_hal_remap16s16u +static inline int kleidicv_remap_s16point5_with_fallback( + int src_type, const uchar *src_data, size_t src_step, int src_width, + int src_height, uchar *dst_data, size_t dst_step, int dst_width, + int dst_height, const int16_t *mapxy, size_t mapxy_step, + const uint16_t *mapfrac, size_t mapfrac_step, int border_type, + const double border_value[4]) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + remap_s16point5, cv_hal_remap16s16u, src_type, src_data, src_step, + src_width, src_height, dst_data, dst_step, dst_width, dst_height, mapxy, + mapxy_step, mapfrac, mapfrac_step, border_type, border_value); +} + +#undef cv_hal_remap16s16u +#define cv_hal_remap16s16u kleidicv_remap_s16point5_with_fallback +#endif // cv_hal_remap16s16u + #endif // OPENCV_IMGPROC_HAL_REPLACEMENT_HPP #ifdef OPENCV_CORE_HAL_REPLACEMENT_HPP diff --git a/adapters/opencv/opencv-4.10.patch b/adapters/opencv/opencv-4.10.patch index 0ab00eb69..23d1dd320 100644 --- a/adapters/opencv/opencv-4.10.patch +++ b/adapters/opencv/opencv-4.10.patch @@ -91,15 +91,15 @@ index 8c6d8ad9a9..47eb6fdb66 100644 } else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp -index 773fed9b48..3f545740f2 100644 +index 773fed9b48..145d653f5d 100644 --- a/modules/imgproc/src/hal_replacement.hpp +++ b/modules/imgproc/src/hal_replacement.hpp -@@ -328,6 +328,32 @@ inline int hal_ni_remap32f(int src_type, const uchar *src_data, size_t src_step, +@@ -328,6 +328,60 @@ inline int hal_ni_remap32f(int src_type, const uchar *src_data, size_t src_step, #define cv_hal_remap32f hal_ni_remap32f //! @endcond +/** -+ @brief hal_remap with short maps ++ @brief hal_remap with a short integer map + @param src_type source and destination image type + @param src_data source image data + @param src_step source image step @@ -123,15 +123,43 @@ index 773fed9b48..3f545740f2 100644 +//! @cond IGNORED +#define cv_hal_remap16s hal_ni_remap16s +//! @endcond ++ ++/** ++ @brief hal_remap with short maps plus fractions ++ @param src_type source and destination image type ++ @param src_data source image data ++ @param src_step source image step ++ @param src_width source image width ++ @param src_height source image height ++ @param dst_data destination image data ++ @param dst_step destination image step ++ @param dst_width destination image width ++ @param dst_height destination image height ++ @param mapxy map for interleaved x and y values ++ @param mapxy_step mapxy matrix step ++ @param mapfrac map for fractional part x and y values (5+5 bits) ++ @param mapfrac_step mapfrac matrix step ++ @param border_type border processing mode (CV_HAL_BORDER_REFLECT, ...) ++ @param border_value values to use for CV_HAL_BORDER_CONSTANT mode ++ @sa cv::remap ++ */ ++inline int hal_ni_remap16s16u(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, ++ uchar *dst_data, size_t dst_step, int dst_width, int dst_height, ++ const short* mapxy, size_t mapxy_step, const unsigned short* mapfrac, size_t mapfrac_step, int border_type, const double border_value[4]) ++{ return CV_HAL_ERROR_NOT_IMPLEMENTED; } ++ ++//! @cond IGNORED ++#define cv_hal_remap16s16u hal_ni_remap16s16u ++//! @endcond + /** @brief hal_cvtBGRtoBGR @param src_data source image data diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp -index d7c9c64c3c..1a382811fa 100644 +index d7c9c64c3c..348208b72d 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp -@@ -1819,6 +1819,10 @@ void cv::remap( InputArray _src, OutputArray _dst, +@@ -1819,6 +1819,14 @@ void cv::remap( InputArray _src, OutputArray _dst, { CALL_HAL(remap32f, cv_hal_remap32f, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, map1.ptr(), map1.step, map2.ptr(), map2.step, interpolation, borderType, borderValue.val); @@ -139,6 +167,10 @@ index d7c9c64c3c..1a382811fa 100644 + { + CALL_HAL(remap16s, cv_hal_remap16s, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, + map1.ptr(), map1.step, borderType, borderValue.val); ++ } else if ((map1.type() == CV_16SC2) && (map2.type() == CV_16UC1) && interpolation == INTER_LINEAR) ++ { ++ CALL_HAL(remap16s16u, cv_hal_remap16s16u, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, ++ map1.ptr(), map1.step, map2.ptr(), map2.step, borderType, borderValue.val); } interpolation &= ~WARP_RELATIVE_MAP; diff --git a/conformity/opencv/test_remap.cpp b/conformity/opencv/test_remap.cpp index 321f7c623..fea0f7a3f 100644 --- a/conformity/opencv/test_remap.cpp +++ b/conformity/opencv/test_remap.cpp @@ -10,12 +10,12 @@ #include "opencv2/imgproc/hal/interface.h" #include "tests.h" -const size_t kMaxHeight = 32, kMaxWidth = 32; +const int kMaxHeight = 32, kMaxWidth = 32; template -static cv::Mat get_source_mat(int Format) { +static cv::Mat get_source_mat(int format) { auto generate_source = [&]() { - cv::Mat m{kMaxHeight, kMaxWidth, Format}; + cv::Mat m(kMaxHeight, kMaxWidth, format); for (size_t row = 0; row < kMaxHeight; ++row) { for (size_t column = 0; column < kMaxWidth; ++column) { m.at(row, column) = @@ -31,10 +31,10 @@ static cv::Mat get_source_mat(int Format) { // BorderValue is interpreted as 1/1000, i.e. 500 for 0.5 template -cv::Mat exec_remap16(cv::Mat& mapxy_mat) { - cv::Mat empty; - cv::Mat result(mapxy_mat.size().height, mapxy_mat.size().width, Format); +cv::Mat exec_remap_s16(cv::Mat& mapxy_mat) { cv::Mat source_mat = get_source_mat(Format); + cv::Mat result(mapxy_mat.rows, mapxy_mat.cols, Format); + cv::Mat empty; remap(source_mat, result, mapxy_mat, empty, Interpolation, BorderMode, BorderValue / 1000.0); return result; @@ -43,18 +43,19 @@ cv::Mat exec_remap16(cv::Mat& mapxy_mat) { #if MANAGER template -bool test_remap16(int index, RecreatedMessageQueue& request_queue, - RecreatedMessageQueue& reply_queue) { +bool test_remap_s16(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat source_mat = get_source_mat(Format); cv::RNG rng(0); - for (size_t x = 5; x <= kMaxWidth; x += 3) { - for (size_t y = 5; y <= kMaxHeight; y += 2) { + for (size_t w = 5; w <= kMaxWidth; w += 3) { + for (size_t h = 5; h <= kMaxHeight; h += 2) { cv::Mat source_mat = get_source_mat(Format); - cv::Mat mapxy_mat(x, y, CV_16SC2); + cv::Mat mapxy_mat(w, h, CV_16SC2); rng.fill(mapxy_mat, cv::RNG::UNIFORM, -3, kMaxWidth + 3); - cv::Mat actual_mat = exec_remap16(mapxy_mat); + cv::Mat actual_mat = exec_remap_s16(mapxy_mat); cv::Mat expected_mat = get_expected_from_subordinate( index, request_queue, reply_queue, mapxy_mat); @@ -64,7 +65,64 @@ bool test_remap16(int index, RecreatedMessageQueue& request_queue, (CV_MAT_DEPTH(Format) == CV_16U && !are_matrices_different(1, actual_mat, expected_mat)); if (!success) { - fail_print_matrices(x, y, source_mat, actual_mat, expected_mat); + fail_print_matrices(w, h, source_mat, actual_mat, expected_mat); + return true; + } + } + } + return false; +} +#endif + +// BorderValue is interpreted as 1/1000, i.e. 500 for 0.5 +template +cv::Mat exec_remap_s16point5(cv::Mat& map_mat) { + cv::Mat empty; + // integer part is 16SC2, that is twice as much data as the fractional part, + // 16UC1 + int height = map_mat.rows * 2 / 3; + cv::Mat mapxy_mat = map_mat.rowRange(0, height); + ushort* p_frac = map_mat.rowRange(height, map_mat.rows).ptr(); + cv::Mat mapfrac_mat{height, map_mat.cols, CV_16UC1, p_frac}; + cv::Mat result(mapxy_mat.rows, mapxy_mat.cols, Format); + cv::Mat source_mat = get_source_mat(Format); + remap(source_mat, result, mapxy_mat, mapfrac_mat, Interpolation, BorderMode, + BorderValue / 1000.0); + return result; +} + +#if MANAGER +template +bool test_remap_s16point5(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat source_mat = get_source_mat(Format); + cv::RNG rng(0); + + for (int w = 5; w <= kMaxWidth; ++w) { + for (int h = 6; h <= kMaxHeight; h += 2) { // h must be even + cv::Mat map_mat(h * 3 / 2, w, CV_16SC2); + cv::Mat mapxy_mat = map_mat.rowRange(0, h); + ushort* p_frac = map_mat.rowRange(h, map_mat.rows).ptr(); + cv::Mat mapfrac_mat{h, map_mat.cols, CV_16UC1, p_frac}; + rng.fill(mapxy_mat, cv::RNG::UNIFORM, -3, kMaxWidth + 3); + // Test out of range fractional part too + rng.fill(mapfrac_mat, cv::RNG::UNIFORM, 0, cv::INTER_TAB_SIZE2 * 3 / 2); + + cv::Mat actual_mat = + exec_remap_s16point5(map_mat); + cv::Mat expected_mat = get_expected_from_subordinate( + index, request_queue, reply_queue, map_mat); + + bool success = + (CV_MAT_DEPTH(Format) == CV_8U && + !are_matrices_different(1, actual_mat, expected_mat)) || + (CV_MAT_DEPTH(Format) == CV_16U && + !are_matrices_different(1, actual_mat, expected_mat)); + if (!success) { + fail_print_matrices(w, h, source_mat, actual_mat, expected_mat); return true; } } @@ -76,7 +134,8 @@ bool test_remap16(int index, RecreatedMessageQueue& request_queue, std::vector& remap_tests_get() { // clang-format off static std::vector tests = { - TEST("Remap16s uint8", (test_remap16), (exec_remap16)), + TEST("RemapS16 uint8", (test_remap_s16), (exec_remap_s16)), + TEST("RemapS16Point5 uint8", (test_remap_s16point5), (exec_remap_s16point5)), }; // clang-format on return tests; diff --git a/doc/functionality.md b/doc/functionality.md index 7ffd1c42f..493751689 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -91,6 +91,7 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | 8x8 | | x | # Remap -| | u8 | u16 | -|--------------------------------|-----|-----| -| Remap int16 coordinates | x | | +| | u8 | u16 | +|--------------------------------------------|-----|-----| +| Remap int16 coordinates | x | | +| Remap int16+uint16 fixed-point coordinates | x | | diff --git a/doc/opencv.md b/doc/opencv.md index 1748c4acc..8fe94b51b 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -195,6 +195,9 @@ Notes on parameters: Geometrically transforms the `src` image by taking the pixels specified by the coordinates from the `map` image. Notes on parameters: * `src.depth()` - only supports `CV_8U` depth and 1 channel. -* `map1` shall be 16SC2 and `map2` shall be empty -* `interpolation` shall be `INTER_NEAREST` -* `borderMode` shall be `BORDER_REPLICATE` +* `borderMode` - only supports `BORDER_REPLICATE` +Supported map configurations: +* `map1` is 16SC2: channel #1 is x coordinate (column) and channel #2 is y (row) + * supported `interpolation`: `INTER_NEAREST` only +* `map1` is 16SC2 and `map2` is 16UC1: `map1` is as above, `map2` contains combined 5+5 bits of x (low) and y (high) fractions, i.e. x = x1 + x2 / 2^5 + * supported `interpolation`: `INTER_LINEAR` only diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 88360568b..20ecb8db2 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1698,6 +1698,20 @@ KLEIDICV_API_DECLARATION(kleidicv_remap_s16_u8, const uint8_t *src, kleidicv_border_type_t border_type, kleidicv_border_values_t border_values); +#ifndef DOXYGEN +/// Internal - not part of the public API and its direct use is not supported. +/// Functionality is similar to @ref kleidicv_remap_s16_u8 , the difference is +/// in the data format: it contains a fractional part with 5+5 bits (`mapfrac`). +KLEIDICV_API_DECLARATION(kleidicv_remap_s16point5_u8, const uint8_t *src, + size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, + size_t dst_height, size_t channels, + const int16_t *mapxy, size_t mapxy_stride, + const uint16_t *mapfrac, size_t mapfrac_stride, + kleidicv_border_type_t border_type, + kleidicv_border_values_t border_values); +#endif // DOXYGEN + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv/include/kleidicv/remap/remap.h b/kleidicv/include/kleidicv/remap/remap.h index 5e8845b77..7af219ba4 100644 --- a/kleidicv/include/kleidicv/remap/remap.h +++ b/kleidicv/include/kleidicv/remap/remap.h @@ -25,6 +25,20 @@ inline bool remap_s16_is_implemented(size_t dst_width, } } +template +inline bool remap_s16point5_is_implemented(size_t dst_width, + kleidicv_border_type_t border_type, + size_t channels) { + if constexpr (std::is_same::value) { + return (dst_width >= 8 && + border_type == + kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_REPLICATE && + channels == 1); + } else { + return false; + } +} + namespace neon { template @@ -34,6 +48,16 @@ kleidicv_error_t remap_s16(const T *src, size_t src_stride, size_t src_width, const int16_t *mapxy, size_t mapxy_stride, kleidicv_border_type_t border_type, kleidicv_border_values_t border_values); + +template +kleidicv_error_t remap_s16point5(const T *src, size_t src_stride, + size_t src_width, size_t src_height, T *dst, + size_t dst_stride, size_t dst_width, + size_t dst_height, size_t channels, + const int16_t *mapxy, size_t mapxy_stride, + const uint16_t *mapfrac, size_t mapfrac_stride, + kleidicv_border_type_t border_type, + kleidicv_border_values_t border_values); } // namespace neon } // namespace kleidicv diff --git a/kleidicv/src/remap/remap_api.cpp b/kleidicv/src/remap/remap_api.cpp index b8f85b4cc..cd29673c0 100644 --- a/kleidicv/src/remap/remap_api.cpp +++ b/kleidicv/src/remap/remap_api.cpp @@ -11,3 +11,4 @@ nullptr, nullptr) KLEIDICV_DEFINE_C_API(kleidicv_remap_s16_u8, remap_s16, uint8_t); +KLEIDICV_DEFINE_C_API(kleidicv_remap_s16point5_u8, remap_s16point5, uint8_t); diff --git a/kleidicv/src/remap/remap_neon.cpp b/kleidicv/src/remap/remap_neon.cpp index 27d5d06c6..827e11956 100644 --- a/kleidicv/src/remap/remap_neon.cpp +++ b/kleidicv/src/remap/remap_neon.cpp @@ -20,11 +20,8 @@ class RemapS16 { using MapVecTraits = neon::VecTraits; using MapVectorType = typename MapVecTraits::VectorType; using MapVector2Type = typename MapVecTraits::Vector2Type; - using VecTraits = neon::VecTraits; - using VectorType = typename VecTraits::VectorType; - explicit RemapS16(Rows src_rows, size_t src_width, - size_t src_height) + RemapS16(Rows src_rows, size_t src_width, size_t src_height) : src_rows_{src_rows}, v_src_stride_{vdupq_n_s16(static_cast(src_rows_.stride()))}, v_xmax_{vdupq_n_s16(static_cast(src_width - 1))}, @@ -100,7 +97,163 @@ kleidicv_error_t remap_s16( return KLEIDICV_OK; } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ +template +class RemapS16Point5; + +template <> +class RemapS16Point5 { + public: + using ScalarType = uint8_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; + using MapVector2Type = typename MapVecTraits::Vector2Type; + using FracVecTraits = neon::VecTraits; + using FracVectorType = typename FracVecTraits::VectorType; + + static const uint16_t FRAC_BITS = 5; + static const uint16_t FRAC_MAX = 1 << FRAC_BITS; + static const uint16_t FRAC_MAX_SQUARE = FRAC_MAX * FRAC_MAX; + + RemapS16Point5(Rows src_rows, size_t src_width, + size_t src_height) + : src_rows_{src_rows}, + v_src_stride_{vdupq_n_s16(static_cast(src_rows_.stride()))}, + v_xmax_{vdupq_n_s16(static_cast(src_width - 1))}, + v_ymax_{vdupq_n_s16(static_cast(src_height - 1))} {} + + void process_row(size_t width, Columns mapxy, + Columns mapfrac, Columns dst) { + auto vector_path = [&](size_t step) { + MapVector2Type xy = vld2q_s16(&mapxy[0]); + FracVectorType frac = vld1q_u16(&mapfrac[0]); + uint16x8_t xfrac = + vbslq_u16(vcltq_s16(xy.val[0], vdupq_n_s16(0)), vdupq_n_u16(0), + vandq_u16(frac, vdupq_n_u16(FRAC_MAX - 1))); + uint16x8_t yfrac = vbslq_u16( + vcltq_s16(xy.val[1], vdupq_n_s16(0)), vdupq_n_u16(0), + vandq_u16(vshrq_n_u16(frac, FRAC_BITS), vdupq_n_u16(FRAC_MAX - 1))); + uint16x8_t nxfrac = vsubq_u16(vdupq_n_u16(FRAC_MAX), xfrac); + uint16x8_t nyfrac = vsubq_u16(vdupq_n_u16(FRAC_MAX), yfrac); + + // Clamp coordinates to within the dimensions of the source image + uint16x8_t x0 = vreinterpretq_u16_s16( + vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[0], v_xmax_))); + uint16x8_t y0 = vreinterpretq_u16_s16( + vmaxq_s16(vdupq_n_s16(0), vminq_s16(xy.val[1], v_ymax_))); + + // x1 = x0 + 1, except if it's already xmax + uint16x8_t x1 = vsubq_u16(x0, vcltq_s16(xy.val[0], v_xmax_)); + uint16x8_t y1 = vsubq_u16(y0, vcltq_s16(xy.val[1], v_ymax_)); + + uint16x4_t dst_low = load_and_interpolate( + vmovl_u16(vget_low_u16(x0)), vget_low_u16(y0), + vmovl_u16(vget_low_u16(x1)), vget_low_u16(y1), vget_low_u16(xfrac), + vget_low_u16(yfrac), vget_low_u16(nxfrac), vget_low_u16(nyfrac)); + + uint16x4_t dst_high = load_and_interpolate( + vmovl_high_u16(x0), vget_high_u16(y0), vmovl_high_u16(x1), + vget_high_u16(y1), vget_high_u16(xfrac), vget_high_u16(yfrac), + vget_high_u16(nxfrac), vget_high_u16(nyfrac)); + + vst1_u8(&dst[0], vuzp1_u8(dst_low, dst_high)); + mapxy += ptrdiff_t(step); + mapfrac += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + LoopUnroll loop{width, MapVecTraits::num_lanes()}; + loop.unroll_once(vector_path); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapxy -= back_step; + mapfrac -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t step) { vector_path(step); }); + } + + private: + uint16x4_t load_and_interpolate(uint32x4_t x0, uint16x4_t y0, uint32x4_t x1, + uint16x4_t y1, uint16x4_t xfrac, + uint16x4_t yfrac, uint16x4_t nxfrac, + uint16x4_t nyfrac) { + // Calculate offsets from coordinates (y * stride + x) + // a: top left, b: top right, c: bottom left, d: bottom right + uint32x4_t offset = vmlal_u16(x0, y0, vget_low_u16(v_src_stride_)); + uint64_t acc = + static_cast(src_rows_[vgetq_lane_u32(offset, 0)]) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 1)]) << 16) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 2)]) << 32) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 3)]) << 48); + uint16x4_t a = vreinterpret_u16_u64(vset_lane_u64(acc, vdup_n_u64(0), 0)); + + offset = vmlal_u16(x1, y0, vget_low_u16(v_src_stride_)); + + acc = static_cast(src_rows_[vgetq_lane_u32(offset, 0)]) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 1)]) << 16) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 2)]) << 32) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 3)]) << 48); + uint16x4_t b = vreinterpret_u16_u64(vset_lane_u64(acc, vdup_n_u64(0), 0)); + + uint16x4_t line0 = vmla_u16(vmul_u16(xfrac, b), nxfrac, a); + + offset = vmlal_u16(x0, y1, vget_low_u16(v_src_stride_)); + + acc = static_cast(src_rows_[vgetq_lane_u32(offset, 0)]) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 1)]) << 16) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 2)]) << 32) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 3)]) << 48); + uint16x4_t c = vreinterpret_u16_u64(vset_lane_u64(acc, vdup_n_u64(0), 0)); + + uint32x4_t line0_lerpd = + vmlal_u16(vdupq_n_u32(FRAC_MAX_SQUARE / 2), line0, nyfrac); + + offset = vmlal_u16(x1, y1, vget_low_u16(v_src_stride_)); + + acc = static_cast(src_rows_[vgetq_lane_u32(offset, 0)]) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 1)]) << 16) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 2)]) << 32) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 3)]) << 48); + uint16x4_t d = vreinterpret_u16_u64(vset_lane_u64(acc, vdup_n_u64(0), 0)); + + uint16x4_t line1 = vmla_u16(vmul_u16(xfrac, d), nxfrac, c); + return vshrn_n_u32(vmlal_u16(line0_lerpd, line1, yfrac), 2 * FRAC_BITS); + } + + Rows src_rows_; + int16x8_t v_src_stride_; + int16x8_t v_xmax_; + int16x8_t v_ymax_; +}; // end of class RemapS16Point5 + +template +kleidicv_error_t remap_s16point5( + const T *src, size_t src_stride, size_t src_width, size_t src_height, + T *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, const int16_t *mapxy, size_t mapxy_stride, + const uint16_t *mapfrac, size_t mapfrac_stride, + [[maybe_unused]] kleidicv_border_type_t border_type, + [[maybe_unused]] kleidicv_border_values_t border_values) { + CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); + CHECK_POINTER_AND_STRIDE(mapxy, mapxy_stride, dst_height); + CHECK_POINTER_AND_STRIDE(mapfrac, mapfrac_stride, dst_height); + CHECK_IMAGE_SIZE(src_width, src_height); + CHECK_IMAGE_SIZE(dst_width, dst_height); + + if (!remap_s16point5_is_implemented(dst_width, border_type, channels)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + Rows src_rows{src, src_stride, channels}; + Rows mapxy_rows{mapxy, mapxy_stride, 2}; + Rows mapfrac_rows{mapfrac, mapfrac_stride, 1}; + Rows dst_rows{dst, dst_stride, channels}; + RemapS16Point5 operation{src_rows, src_width, src_height}; + Rectangle rect{dst_width, dst_height}; + zip_rows(operation, rect, mapxy_rows, mapfrac_rows, dst_rows); + return KLEIDICV_OK; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(type) \ template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16( \ const type *src, size_t src_stride, size_t src_width, size_t src_height, \ type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \ @@ -108,6 +261,17 @@ kleidicv_error_t remap_s16( kleidicv_border_type_t border_type, \ kleidicv_border_values_t border_values) -KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint8_t); + +#define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16Point5(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16point5( \ + const type *src, size_t src_stride, size_t src_width, size_t src_height, \ + type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \ + size_t channels, const int16_t *mapxy, size_t mapxy_stride, \ + const uint16_t *mapfrac, size_t mapfrac_stride, \ + kleidicv_border_type_t border_type, \ + kleidicv_border_values_t border_values) + +KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16Point5(uint8_t); } // namespace kleidicv::neon diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index 9d44879de..08a0ac6b1 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -367,6 +367,18 @@ kleidicv_error_t kleidicv_thread_remap_s16_u8( kleidicv_border_type_t border_type, kleidicv_border_values_t border_values, kleidicv_thread_multithreading); +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_remap_s16point5_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_remap_s16point5_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, const int16_t *mapxy, size_t mapxy_stride, + const uint16_t *mapfrac, size_t mapfrac_stride, + kleidicv_border_type_t border_type, kleidicv_border_values_t border_values, + kleidicv_thread_multithreading); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index e782e48ec..f043063a8 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -608,3 +608,25 @@ kleidicv_error_t kleidicv_thread_remap_s16_u8( }; return parallel_batches(callback, mt, dst_height); } + +kleidicv_error_t kleidicv_thread_remap_s16point5_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, const int16_t *mapxy, size_t mapxy_stride, + const uint16_t *mapfrac, size_t mapfrac_stride, + kleidicv_border_type_t border_type, kleidicv_border_values_t border_values, + kleidicv_thread_multithreading mt) { + if (!kleidicv::remap_s16point5_is_implemented(dst_width, border_type, + channels)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_remap_s16point5_u8( + src, src_stride, src_width, src_height, + dst + begin * dst_stride / sizeof(uint8_t), dst_stride, dst_width, + end - begin, channels, mapxy + begin * mapxy_stride / sizeof(int16_t), + mapxy_stride, mapfrac + begin * mapfrac_stride / sizeof(uint16_t), + mapfrac_stride, border_type, border_values); + }; + return parallel_batches(callback, mt, dst_height); +} diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 1e83606f0..6de898fac 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -94,6 +94,7 @@ benchmarks=( "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(3840x2160, 32FC1, 1, 2)'" "Remap_S16_U8: opencv_perf_imgproc '*Remap/*' '(3840x2160, 8UC1, 16SC2, INTER_NEAREST, BORDER_REPLICATE)'" + "Remap_S16Point5_U8: opencv_perf_imgproc '*Remap/*' '(3840x2160, 8UC1, 16SC2, INTER_LINEAR, BORDER_REPLICATE)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index f5677e5ce..573c951e1 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -94,6 +94,7 @@ benchmarks=( "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(1920x1080, 32FC1, 1, 2)'" "Remap_S16_U8: opencv_perf_imgproc '*Remap/*' '(1920x1080, 8UC1, 16SC2, INTER_NEAREST, BORDER_REPLICATE)'" + "Remap_S16Point5_U8: opencv_perf_imgproc '*Remap/*' '(1920x1080, 8UC1, 16SC2, INTER_LINEAR, BORDER_REPLICATE)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/test/api/test_remap.cpp b/test/api/test_remap.cpp index 888885bdf..2698f0967 100644 --- a/test/api/test_remap.cpp +++ b/test/api/test_remap.cpp @@ -192,4 +192,225 @@ TYPED_TEST(Remap16, NotSupported) { kleidicv_remap_s16_u8(src, 1, 1, 1, dst, 8, 8, 1, 1, mapxy, 4, KLEIDICV_BORDER_TYPE_CONSTANT, kleidicv_border_values_t{})); + + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16_u8(src, 1, 1, 1, dst, 1, KLEIDICV_MAX_IMAGE_PIXELS + 1, + 1, 1, mapxy, 4, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16_u8( + src, 1, 1, 1, dst, 1, KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, 1, mapxy, 4, + KLEIDICV_BORDER_TYPE_REPLICATE, kleidicv_border_values_t{})); +} + +template +class RemapS16Point5 : public testing::Test { + public: + static const uint16_t FRAC_BITS = 5; + static const uint16_t FRAC_MAX = 1 << FRAC_BITS; + static const uint16_t FRAC_MAX_SQUARE = FRAC_MAX * FRAC_MAX; + + static void test_random(size_t src_w, size_t src_h, size_t dst_w, + size_t dst_h, size_t channels, size_t padding) { + test::Array2D mapxy(2 * dst_w, dst_h, padding, 2); + test::PseudoRandomNumberGenerator coord_generator; + mapxy.fill(coord_generator); + test::Array2D mapfrac(dst_w, dst_h, padding); + test::PseudoRandomNumberGenerator frac_generator; + mapfrac.fill(frac_generator); + execute_test(mapxy, mapfrac, src_w, src_h, dst_w, dst_h, channels, padding); + } + + static void test_outside_random(size_t src_w, size_t src_h, size_t dst_w, + size_t dst_h, size_t channels, + size_t padding) { + test::Array2D mapxy(2 * dst_w, dst_h, padding, 2); + test::PseudoRandomNumberGeneratorIntRange coord_generator{ + static_cast(-src_w), static_cast(2 * src_w)}; + mapxy.fill(coord_generator); + test::Array2D mapfrac(dst_w, dst_h, padding); + test::PseudoRandomNumberGeneratorIntRange frac_generator( + 0, static_cast(FRAC_MAX_SQUARE * 3 / 2)); + mapfrac.fill(frac_generator); + execute_test(mapxy, mapfrac, src_w, src_h, dst_w, dst_h, channels, padding); + } + + static void test_blend(size_t src_w, size_t src_h, size_t dst_w, size_t dst_h, + size_t channels, size_t padding) { + test::Array2D mapxy{2 * dst_w, dst_h, padding, 2}; + test::Array2D mapfrac(dst_w, dst_h, padding); + for (size_t row = 0; row < dst_h; ++row) { + for (size_t column = 0; column < dst_w; ++column) { + // Use a second degree function to add a nonlinear blend to the image + auto r = static_cast(row), c = static_cast(column), + w = static_cast(dst_w), h = static_cast(dst_h); + double x = c * 2 - c * c / w; + double y = r * (w - c) / w + 4 * r / h; + *mapxy.at(row, column * 2) = std::max( + 0, + std::min(static_cast(src_w - 1), static_cast(x))); + *mapxy.at(row, column * 2 + 1) = std::max( + 0, + std::min(static_cast(src_h - 1), static_cast(y))); + *mapfrac.at(row, column) = + static_cast(FRAC_MAX * (x - static_cast(x))) | + (static_cast(FRAC_MAX * (y - static_cast(y))) + << FRAC_BITS); + } + } + execute_test(mapxy, mapfrac, src_w, src_h, dst_w, dst_h, channels, padding); + } + + private: + static void execute_test(test::Array2D &mapxy, + test::Array2D &mapfrac, size_t src_w, + size_t src_h, size_t dst_w, size_t dst_h, + size_t channels, size_t padding) { + size_t src_total_width = channels * src_w; + size_t dst_total_width = channels * dst_w; + + test::Array2D source{src_total_width, src_h, padding, channels}; + test::Array2D actual{dst_total_width, dst_h, padding, channels}; + test::Array2D expected{dst_total_width, dst_h, padding, + channels}; + + test::PseudoRandomNumberGenerator generator; + source.fill(generator); + actual.fill(42); + + calculate_expected(source, mapxy, mapfrac, expected); + + ASSERT_EQ( + KLEIDICV_OK, + kleidicv_remap_s16point5_u8( + source.data(), source.stride(), source.width(), source.height(), + actual.data(), actual.stride(), actual.width(), actual.height(), + channels, mapxy.data(), mapxy.stride(), mapfrac.data(), + mapfrac.stride(), KLEIDICV_BORDER_TYPE_REPLICATE, {})); + + EXPECT_EQ_ARRAY2D(actual, expected); + } + + static ScalarType lerp2d(size_t cx, size_t cy, ScalarType a, ScalarType b, + ScalarType c, ScalarType d) { + size_t inv_cx = FRAC_MAX - cx, inv_cy = FRAC_MAX - cy; + ScalarType r = static_cast((inv_cx * inv_cy * a + + cx * inv_cy * b + inv_cx * cy * c + + cx * cy * d + FRAC_MAX_SQUARE / 2) / + FRAC_MAX_SQUARE); + return r; + } + + static void calculate_expected(test::Array2D &src, + test::Array2D &mapxy, + test::Array2D &mapfrac, + test::Array2D &expected) { + for (size_t row = 0; row < expected.height(); row++) { + for (size_t column = 0; column < expected.width() / src.channels(); + ++column) { + for (size_t ch = 0; ch < src.channels(); ++ch) { + uint8_t x_frac = *mapfrac.at(row, column) & (FRAC_MAX - 1); + uint8_t y_frac = + (*mapfrac.at(row, column) >> FRAC_BITS) & (FRAC_MAX - 1); + int16_t x0 = + std::min(src.width() - 1, *mapxy.at(row, column * 2)); + int16_t y0 = std::min(src.height() - 1, + *mapxy.at(row, column * 2 + 1)); + if (x0 < 0) { + x0 = 0, x_frac = 0; + } + if (y0 < 0) { + y0 = 0, y_frac = 0; + } + int16_t x1 = static_cast(x0) >= src.width() - 1 ? x0 : x0 + 1; + int16_t y1 = + static_cast(y0) >= src.height() - 1 ? y0 : y0 + 1; + *expected.at(row, column * src.channels() + ch) = + lerp2d(x_frac, y_frac, *src.at(y0, x0 * src.channels() + ch), + *src.at(y0, x1 * src.channels() + ch), + *src.at(y1, x0 * src.channels() + ch), + *src.at(y1, x1 * src.channels() + ch)); + } + } + } + } +}; + +using RemapS16Point5ElementTypes = ::testing::Types; +TYPED_TEST_SUITE(RemapS16Point5, RemapS16Point5ElementTypes); + +TYPED_TEST(RemapS16Point5, RandomNoPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + TestFixture::test_random(src_w, src_h, dst_w, dst_h, 1, 0); +} + +TYPED_TEST(RemapS16Point5, BlendPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + TestFixture::test_blend(src_w, src_h, dst_w, dst_h, 1, 13); +} + +TYPED_TEST(RemapS16Point5, NullPointer) { + const TypeParam src[4] = {}; + TypeParam dst[1]; + int16_t mapxy[2] = {}; + uint16_t mapfrac[1] = {}; + test::test_null_args(kleidicv_remap_s16point5_u8, src, 2, 2, 2, dst, 1, 1, 1, + 1, mapxy, 4, mapfrac, 2, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{}); +} + +TYPED_TEST(RemapS16Point5, ZeroImageSize) { + const TypeParam src[1] = {}; + TypeParam dst[1]; + int16_t mapxy[2] = {}; + uint16_t mapfrac[1] = {}; + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_s16point5_u8( + src, 1, 0, 1, dst, 1, 0, 1, 1, mapxy, 4, mapfrac, 2, + KLEIDICV_BORDER_TYPE_REPLICATE, kleidicv_border_values_t{})); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_s16point5_u8( + src, 1, 1, 0, dst, 1, 1, 0, 1, mapxy, 4, mapfrac, 2, + KLEIDICV_BORDER_TYPE_REPLICATE, kleidicv_border_values_t{})); +} + +TYPED_TEST(RemapS16Point5, InvalidImageSize) { + const TypeParam src[1] = {}; + TypeParam dst[1]; + int16_t mapxy[2] = {}; + uint16_t mapfrac[1] = {}; + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16point5_u8(src, 1, KLEIDICV_MAX_IMAGE_PIXELS + 1, + 1, dst, 1, 1, 1, 1, mapxy, 4, mapfrac, + 2, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16point5_u8( + src, 1, KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS, + dst, 1, 1, 1, 1, mapxy, 4, mapfrac, 2, + KLEIDICV_BORDER_TYPE_REPLICATE, kleidicv_border_values_t{})); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16point5_u8( + src, 1, 1, 1, dst, 1, KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, + mapxy, 4, mapfrac, 2, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{})); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_s16point5_u8( + src, 1, 1, 1, dst, 1, KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, 1, mapxy, 4, mapfrac, 2, + KLEIDICV_BORDER_TYPE_REPLICATE, kleidicv_border_values_t{})); } diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 5366c51b2..41c84f541 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -107,9 +107,9 @@ class Thread : public testing::TestWithParam

{ template - void check_remap16s(SingleThreadedFunc single_threaded_func, - MultithreadedFunc multithreaded_func, size_t channels, - Args... args) { + void check_remap_s16(SingleThreadedFunc single_threaded_func, + MultithreadedFunc multithreaded_func, size_t channels, + Args... args) { unsigned test_width = 0, height = 0, thread_count = 0; std::tie(test_width, height, thread_count) = GetParam(); const unsigned src_width = 300, src_height = 300; @@ -143,8 +143,8 @@ class Thread : public testing::TestWithParam

{ } template - void check_remap16s_not_implemented(MultithreadedFunc multithreaded_func, - size_t channels, Args... args) { + void check_remap_s16_not_implemented(MultithreadedFunc multithreaded_func, + size_t channels, Args... args) { unsigned test_width = 0, height = 0, thread_count = 0; std::tie(test_width, height, thread_count) = GetParam(); const unsigned src_width = 300, src_height = 300; @@ -169,6 +169,79 @@ class Thread : public testing::TestWithParam

{ EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); } + + template + void check_remap_s16point5(SingleThreadedFunc single_threaded_func, + MultithreadedFunc multithreaded_func, + size_t channels, Args... args) { + unsigned test_width = 0, height = 0, thread_count = 0; + std::tie(test_width, height, thread_count) = GetParam(); + const unsigned src_width = 300, src_height = 300; + // width < 8 are not supported, that's not tested here + size_t width = test_width + 8; + test::Array2D src(size_t{src_width} * channels, src_height); + test::Array2D mapxy(width * 2, height); + test::Array2D mapfrac(width, height); + test::Array2D dst_single(width * channels, height), + dst_multi(width * channels, height); + + test::PseudoRandomNumberGenerator src_generator; + src.fill(src_generator); + test::PseudoRandomNumberGeneratorIntRange coord_generator{ + 0, std::min(static_cast(src_height - 1), + static_cast(src_width - 1))}; + mapxy.fill(coord_generator); + test::PseudoRandomNumberGeneratorIntRange coordfrac_generator{ + 0, (1 << 10) - 1}; + mapfrac.fill(coordfrac_generator); + + kleidicv_error_t single_result = single_threaded_func( + src.data(), src.stride(), src_width, src_height, dst_single.data(), + dst_single.stride(), width, height, channels, mapxy.data(), + mapxy.stride(), mapfrac.data(), mapfrac.stride(), args...); + + kleidicv_error_t multi_result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst_multi.data(), + dst_multi.stride(), width, height, channels, mapxy.data(), + mapxy.stride(), mapfrac.data(), mapfrac.stride(), args..., + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, single_result); + EXPECT_EQ(KLEIDICV_OK, multi_result); + EXPECT_EQ_ARRAY2D(dst_multi, dst_single); + } + + template + void check_remap_s16point5_not_implemented( + MultithreadedFunc multithreaded_func, size_t channels, Args... args) { + unsigned test_width = 0, height = 0, thread_count = 0; + std::tie(test_width, height, thread_count) = GetParam(); + const unsigned src_width = 300, src_height = 300; + // width < 8 are not supported! + size_t width = test_width + 8; + test::Array2D src(size_t{src_width} * channels, src_height); + test::Array2D mapxy(width * 2, height); + test::Array2D mapfrac(width, height); + test::Array2D dst_small(test_width * channels, height), + dst(width * channels, height); + + kleidicv_error_t result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst.data(), + dst.stride(), width, height, channels, mapxy.data(), mapxy.stride(), + mapfrac.data(), mapfrac.stride(), args..., + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); + + result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst_small.data(), + dst_small.stride(), test_width, height, channels, mapxy.data(), + mapxy.stride(), mapfrac.data(), mapfrac.stride(), args..., + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); + } }; #define TEST_UNARY_OP(suffix, SrcT, DstT, ...) \ @@ -347,19 +420,35 @@ TEST(ThreadSeparableFilter2D, NotImplemented) { kleidicv_thread_separable_filter_2d_u16); } -TEST_P(Thread, remap16s_u8_border_replicate) { - check_remap16s(kleidicv_remap_s16_u8, kleidicv_thread_remap_s16_u8, - 1, KLEIDICV_BORDER_TYPE_REPLICATE, - kleidicv_border_values_t{}); +TEST_P(Thread, remap_s16_u8_border_replicate) { + check_remap_s16(kleidicv_remap_s16_u8, kleidicv_thread_remap_s16_u8, + 1, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{}); +} + +TEST_P(Thread, remap_s16_u8_not_implemented) { + check_remap_s16_not_implemented(kleidicv_thread_remap_s16_u8, 2, + KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{}); + check_remap_s16_not_implemented(kleidicv_thread_remap_s16_u8, 1, + KLEIDICV_BORDER_TYPE_CONSTANT, + kleidicv_border_values_t{}); +} + +TEST_P(Thread, remap_s16point5_u8_border_replicate) { + kleidicv_border_values_t border_values = {}; + check_remap_s16point5(kleidicv_remap_s16point5_u8, + kleidicv_thread_remap_s16point5_u8, 1, + KLEIDICV_BORDER_TYPE_REPLICATE, border_values); } -TEST_P(Thread, remap16s_u8_not_implemented) { - check_remap16s_not_implemented(kleidicv_thread_remap_s16_u8, 2, - KLEIDICV_BORDER_TYPE_REPLICATE, - kleidicv_border_values_t{}); - check_remap16s_not_implemented(kleidicv_thread_remap_s16_u8, 1, - KLEIDICV_BORDER_TYPE_CONSTANT, - kleidicv_border_values_t{}); +TEST_P(Thread, remap_s16point5_u8_not_implemented) { + check_remap_s16point5_not_implemented( + kleidicv_thread_remap_s16point5_u8, 2, KLEIDICV_BORDER_TYPE_REPLICATE, + kleidicv_border_values_t{}); + check_remap_s16point5_not_implemented( + kleidicv_thread_remap_s16point5_u8, 1, KLEIDICV_BORDER_TYPE_CONSTANT, + kleidicv_border_values_t{}); } TEST_P(Thread, SobelHorizontal1Channel) { -- GitLab