diff --git a/CHANGELOG.md b/CHANGELOG.md index 64273414c16d7aaf3f1bc60e45c65cdb288f5e10..d55cb571f2a9eae8e7efbfa927efc93595b1be29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,8 +18,10 @@ This changelog aims to follow the guiding principles of ### Added - Implementation of Rotate 90 degrees clockwise. - Remap implementations with + - Integer coordinates with nearest neighbour method + - Fixed-point coordinates with linear interpolation + - Floating-point coordinates with linear interpolation - Replicated and constant borders - - Nearest neighbour and fixed-point interpolations - 1-channel only - u8 and u16 inputs - WarpPerspective implementation diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index addbd7281f88ffad983423049f78525fe90f6c20..c5b82b23ba08247482a0768d10bbc6490401dbc7 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -1377,6 +1377,38 @@ int remap_s16point5(int src_type, const uchar *src_data, size_t src_step, return CV_HAL_ERROR_NOT_IMPLEMENTED; } +int remap_f32(int src_type, const uchar *src_data, size_t src_step, + int src_width, int src_height, uchar *dst_data, size_t dst_step, + int dst_width, int dst_height, float *mapx, size_t mapx_step, + float *mapy, size_t mapy_step, int interpolation, int border_type, + const double border_value_f64[4]) { + kleidicv_border_type_t kleidicv_border_type; + if (from_opencv(border_type, kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_interpolation_type_t kleidicv_interpolation_type; + if (from_opencv(interpolation, kleidicv_interpolation_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + auto border_value = get_border_value(border_value_f64); + + auto mt = get_multithreading(); + + // Only implement CV_8UC1 so far + if (src_type == CV_8UC1) { + return convert_error(kleidicv_thread_remap_f32_u8( + src_data, src_step, static_cast(src_width), + static_cast(src_height), dst_data, dst_step, + static_cast(dst_width), static_cast(dst_height), 1, + mapx, mapx_step, mapy, mapy_step, kleidicv_interpolation_type, + kleidicv_border_type, border_value.data(), mt)); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + int pyrdown(const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, int depth, int cn, int border_type) { diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 2a5f04e31184457085174bcaf6af04c701ec3dd5..dff50167646408867c4f1c4381a18f4a6a7507f3 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -158,6 +158,12 @@ int remap_s16point5(int src_type, const uchar *src_data, size_t src_step, const uint16_t *mapfrac, size_t mapfrac_step, int border_type, const double border_value[4]); +int remap_f32(int src_type, const uchar *src_data, size_t src_step, + int src_width, int src_height, uchar *dst_data, size_t dst_step, + int dst_width, int dst_height, float *mapx, size_t mapx_step, + float *mapy, size_t mapy_step, int interpolation, int border_type, + const double border_value[4]); + int warp_perspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, @@ -418,6 +424,20 @@ static inline int kleidicv_remap_s16point5_with_fallback( #define cv_hal_remap16s16u kleidicv_remap_s16point5_with_fallback #endif // cv_hal_remap16s16u +static inline int kleidicv_remap_f32_with_fallback( + int src_type, const uchar *src_data, size_t src_step, int src_width, + int src_height, uchar *dst_data, size_t dst_step, int dst_width, + int dst_height, float *mapx, size_t mapx_step, float *mapy, + size_t mapy_step, int interpolation, int border_type, + const double border_value[4]) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + remap_f32, cv_hal_remap32f, src_type, src_data, src_step, src_width, + src_height, dst_data, dst_step, dst_width, dst_height, mapx, mapx_step, + mapy, mapy_step, interpolation, border_type, border_value); +} +#undef cv_hal_remap32f +#define cv_hal_remap32f kleidicv_remap_f32_with_fallback + // pyrdown static inline int kleidicv_pyrdown_with_fallback( const uchar *src_data, size_t src_step, int src_width, int src_height, diff --git a/adapters/opencv/opencv-4.11.patch b/adapters/opencv/opencv-4.11.patch index 30962741c0d7da2feea1cc40ca6f3d5188dabb65..2029c3026cf5b340b71969a86cad6fdcca7c3d5f 100644 --- a/adapters/opencv/opencv-4.11.patch +++ b/adapters/opencv/opencv-4.11.patch @@ -19,7 +19,7 @@ index 2b4035285f..729cd1dd43 100644 @@ -281,6 +281,11 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const dst.create(dims, size, dtype); Mat dstMat = dst.getMat(); - + + if( dims <= 2 ) { + int width_in_elements = src.cols * cn; + CALL_HAL(convertTo, cv_hal_convertTo, src.data, src.step, src.depth(), dstMat.data, dstMat.step, dstMat.depth(), width_in_elements, src.rows, alpha, beta); @@ -35,7 +35,7 @@ index 474fe17393..5d5289cc16 100644 @@ -1011,6 +1011,53 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst #define cv_hal_transpose2d hal_ni_transpose2d //! @endcond - + +/** + @brief sum + @param src_data,src_step,src_type Source image @@ -84,15 +84,15 @@ index 474fe17393..5d5289cc16 100644 +//! @endcond + //! @} - - + + diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp index fade948336..17b40ca0e8 100644 --- a/modules/core/src/sum.dispatch.cpp +++ b/modules/core/src/sum.dispatch.cpp @@ -199,6 +199,10 @@ Scalar sum(InputArray _src) CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res); - + int k, cn = src.channels(), depth = src.depth(); + + double result = 0; @@ -100,7 +100,7 @@ index fade948336..17b40ca0e8 100644 + SumFunc func = getSumFunc(depth); CV_Assert( cn <= 4 && func != 0 ); - + diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp index fe6019e3a7..b2d8c8b533 100644 --- a/modules/imgproc/src/hal_replacement.hpp @@ -108,7 +108,7 @@ index fe6019e3a7..b2d8c8b533 100644 @@ -378,6 +378,60 @@ inline int hal_ni_remap32f(int src_type, const uchar *src_data, size_t src_step, #define cv_hal_remap32f hal_ni_remap32f //! @endcond - + +/** + @brief hal_remap with a short integer map + @param src_type source and destination image type @@ -183,8 +183,38 @@ index dfc718bf87..c1f953f230 100644 + CALL_HAL(remap16s16u, cv_hal_remap16s16u, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, + map1.ptr(), map1.step, map2.ptr(), map2.step, borderType, borderValue.val); } - + interpolation &= ~WARP_RELATIVE_MAP; +diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp +index f7dafbd956..13c1341716 100644 +--- a/modules/imgproc/src/smooth.dispatch.cpp ++++ b/modules/imgproc/src/smooth.dispatch.cpp +@@ -655,6 +655,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, + ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kx, ky, borderType) + ); + ++ { ++ Mat src = _src.getMat(); ++ Mat dst = _dst.getMat(); ++ ++ Point ofs; ++ Size wsz(src.cols, src.rows); ++ if(!(borderType & BORDER_ISOLATED)) ++ src.locateROI( wsz, ofs ); ++ ++ if (sigma1 == 0.0 && sigma2 == 0.0 && ksize.height == ksize.width) { ++ CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, ++ ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED); ++ } ++ ++ CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, ++ ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, ++ sigma1, sigma2, borderType&~BORDER_ISOLATED); ++ } ++ + if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.isSubmatrix())) + { + std::vector fkx, fky; diff --git a/modules/imgproc/test/test_imgwarp_strict.cpp b/modules/imgproc/test/test_imgwarp_strict.cpp index 673c6f03e6..56d9e0b554 100644 --- a/modules/imgproc/test/test_imgwarp_strict.cpp @@ -196,5 +226,22 @@ index 673c6f03e6..56d9e0b554 100644 - return 1.0f; + return 4.0f; // Reference algorithm uses integer interpolation with 5 bits fractional part, and this greater tolerance allows better precision algorithms to pass the test } - + void CV_ImageWarpBaseTest::validate_results() const +diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp +index e8840d231b..adfaad38b0 100644 +--- a/modules/imgproc/test/test_imgwarp.cpp ++++ b/modules/imgproc/test/test_imgwarp.cpp +@@ -1371,7 +1371,11 @@ TEST_P(Imgproc_RemapRelative, validity) + cv::remap(src, dstRelative, mapRelativeX32F, mapRelativeY32F, interpolation | WARP_RELATIVE_MAP, borderType); + } + +- EXPECT_EQ(cvtest::norm(dstAbsolute, dstRelative, NORM_INF), 0); ++ if (interpolation != INTER_LINEAR) { ++ EXPECT_EQ(cvtest::norm(dstAbsolute, dstRelative, NORM_INF), 0); ++ } else { // TODO: Check whether 4 is acceptable? f32_u8_linear_constant can be 4 ++ EXPECT_LT(cvtest::norm(dstAbsolute, dstRelative, NORM_INF), 5); // Reference algorithm uses integer interpolation with 5 bits fractional part, and this greater tolerance allows better precision algorithms to pass the test ++ } + }; + + INSTANTIATE_TEST_CASE_P(ImgProc, Imgproc_RemapRelative, testing::Combine( diff --git a/adapters/opencv/opencv-5.x.patch b/adapters/opencv/opencv-5.x.patch index 3f0298d27d4acd506e66a2c03e2e3c3fb218216d..bf7ce73b983c604397150284c24d1d973acb3f6a 100644 --- a/adapters/opencv/opencv-5.x.patch +++ b/adapters/opencv/opencv-5.x.patch @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -170,3 +170,20 @@ index 83a5e23781..75df962fb7 100644 - return 1.0f; + return 4.0f; // Reference algorithm uses integer interpolation with 5 bits fractional part, and this greater tolerance allows better precision algorithms to pass the test } +diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp +index aded7bb74e..adf6a67e2b 100644 +--- a/modules/imgproc/test/test_imgwarp.cpp ++++ b/modules/imgproc/test/test_imgwarp.cpp +@@ -811,7 +811,11 @@ TEST_P(Imgproc_RemapRelative, validity) + cv::remap(src, dstRelative, mapRelativeX32F, mapRelativeY32F, interpolation | WARP_RELATIVE_MAP, borderType); + } + +- EXPECT_EQ(cvtest::norm(dstAbsolute, dstRelative, NORM_INF), 0); ++ if (interpolation != INTER_LINEAR) { ++ EXPECT_EQ(cvtest::norm(dstAbsolute, dstRelative, NORM_INF), 0); ++ } else { ++ EXPECT_LT(cvtest::norm(dstAbsolute, dstRelative, NORM_INF), 4); // Reference algorithm uses integer interpolation with 5 bits fractional part, and this greater tolerance allows better precision algorithms to pass the test ++ } + }; + + INSTANTIATE_TEST_CASE_P(ImgProc, Imgproc_RemapRelative, testing::Combine( diff --git a/conformity/opencv/test_remap.cpp b/conformity/opencv/test_remap.cpp index 6034acc286658fa0ebfcdc202270940229d17328..89d6b0ebab5a1cedc906affa336d103bc58bfdda 100644 --- a/conformity/opencv/test_remap.cpp +++ b/conformity/opencv/test_remap.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -18,8 +18,14 @@ static cv::Mat get_source_mat(int format) { cv::Mat m(kMaxHeight, kMaxWidth, format); for (size_t row = 0; row < kMaxHeight; ++row) { for (size_t column = 0; column < kMaxWidth; ++column) { - m.at(row, column) = - (row * kMaxWidth + column) % std::numeric_limits::max(); + // Referring to the conformity check in test_warp_perspective + // Remap calculation in float greatly amplifies any small errors + // coming from precision innaccuracies, but ensuring that neighbouring + // pixels have neighbouring values decreases this effect, so it can be + // expected that the error won't be bigger than 1. + const int kMaxValue = std::numeric_limits::max(); + m.at(row, column) = abs( + static_cast(row + column) % (2 * kMaxValue + 1) - kMaxValue); } } return m; @@ -79,8 +85,7 @@ template cv::Mat exec_remap_s16point5(cv::Mat& map_mat) { cv::Mat empty; - // integer part is 16SC2, that is twice as much data as the fractional part, - // 16UC1 + // integer part is 16SC2, twice as much data as the fractional part, 16UC1 int height = map_mat.rows * 2 / 3; cv::Mat mapxy_mat = map_mat.rowRange(0, height); ushort* p_frac = map_mat.rowRange(height, map_mat.rows).ptr(); @@ -131,6 +136,64 @@ bool test_remap_s16point5(int index, RecreatedMessageQueue& request_queue, } #endif +// BorderValue is interpreted as 1/1000, i.e. 500 for 0.5 +template +cv::Mat exec_remap_f32(cv::Mat& mapxy_mat) { + cv::Mat source_mat = get_source_mat(Format); + cv::Mat result(mapxy_mat.rows, mapxy_mat.cols, Format); + + cv::Mat mapx_mat = mapxy_mat.rowRange(0, mapxy_mat.rows / 2); + cv::Mat mapy_mat = mapxy_mat.rowRange(mapxy_mat.rows / 2, mapxy_mat.rows); + + remap(source_mat, result, mapx_mat, mapy_mat, Interpolation, BorderMode, + BorderValue / 1000.0); + return result; +} + +#if MANAGER +template +bool test_remap_f32(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat source_mat = get_source_mat(Format); + cv::RNG rng(0); + + for (size_t w = 5; w <= kMaxWidth; w += 3) { + for (size_t h = 5; h <= kMaxHeight; h += 2) { + cv::Mat map_mat(h * 2, w, CV_32FC1); + cv::Mat mapx_mat = map_mat.rowRange(0, h); + rng.fill(mapx_mat, cv::RNG::UNIFORM, -3, kMaxWidth + 3); + + cv::Mat mapy_mat = map_mat.rowRange(h, map_mat.rows); + rng.fill(mapy_mat, cv::RNG::UNIFORM, -3, kMaxHeight + 3); + + cv::Mat actual_mat = exec_remap_f32(map_mat); + + cv::Mat expected_mat = get_expected_from_subordinate( + index, request_queue, reply_queue, map_mat); + + bool success = + (CV_MAT_DEPTH(Format) == CV_8U && + !are_matrices_different(2, actual_mat, expected_mat)) || + (CV_MAT_DEPTH(Format) == CV_16U && + !are_matrices_different(2, actual_mat, expected_mat)); + if (!success) { + fail_print_matrices(w, h, source_mat, actual_mat, expected_mat); + std::cout << "=== mapx_mat:" << std::endl; + std::cout << mapx_mat << std::endl << std::endl; + std::cout << "=== mapy_mat:" << std::endl; + std::cout << mapy_mat << std::endl << std::endl; + return true; + } + } + } + return false; +} + +#endif + std::vector& remap_tests_get() { // clang-format off static std::vector tests = { @@ -138,11 +201,13 @@ std::vector& remap_tests_get() { TEST("RemapS16 uint16 Replicate", (test_remap_s16), (exec_remap_s16)), TEST("RemapS16Point5 uint8 Replicate", (test_remap_s16point5), (exec_remap_s16point5)), TEST("RemapS16Point5 uint16 Replicate", (test_remap_s16point5), (exec_remap_s16point5)), + TEST("RemapF32 uint8 Replicate", (test_remap_f32), (exec_remap_f32)), - TEST("RemapS16 uint8 Constant", (test_remap_s16), (exec_remap_s16)), - TEST("RemapS16 uint16 Constant", (test_remap_s16), (exec_remap_s16)), - TEST("RemapS16Point5 uint8 Constant", (test_remap_s16point5), (exec_remap_s16point5)), - TEST("RemapS16Point5 uint16 Constant", (test_remap_s16point5), (exec_remap_s16point5)), + TEST("RemapS16 uint8 Constant", (test_remap_s16), (exec_remap_s16)), + TEST("RemapS16 uint16 Constant", (test_remap_s16), (exec_remap_s16)), + TEST("RemapS16Point5 uint8 Constant", (test_remap_s16point5), (exec_remap_s16point5)), + TEST("RemapS16Point5 uint16 Constant", (test_remap_s16point5), (exec_remap_s16point5)), + TEST("RemapF32 uint8 Constant", (test_remap_f32), (exec_remap_f32)), }; // clang-format on return tests; diff --git a/conformity/opencv/utils.h b/conformity/opencv/utils.h index ef5ae65fcb6ffff7ec0cf647ee50b46938de63a8..bf19e7b13f8e773575e073a7280bb72a9bd27fab 100644 --- a/conformity/opencv/utils.h +++ b/conformity/opencv/utils.h @@ -72,7 +72,9 @@ bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) { for (int i = 0; i < A.rows; ++i) { for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) { if (abs_diff(A.at(i, j), B.at(i, j)) > threshold) { - std::cout << "=== Mismatch at: " << i << " " << j << std::endl + std::cout << "=== Mismatch at [" << i << ", " << j + << "]: " << +A.at(i, j) << " vs. " << +B.at(i, j) + << std::endl << std::endl; return true; } diff --git a/doc/functionality.md b/doc/functionality.md index 2f2e2eed54dc3b65ee3674d10b5f1a652265a3b4..ab124b7ccd899e8ad037edc5e2391206cf8a94b7 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -1,5 +1,5 @@ @@ -97,6 +97,7 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. |--------------------------------------------|-----|-----| | Remap int16 coordinates | x | x | | Remap int16+uint16 fixed-point coordinates | x | x | +| Remap float32 coordinates | x | | # WarpPerspective | | u8 | diff --git a/doc/opencv.md b/doc/opencv.md index b9db1d3050df02546b4d7c1f7923d5ae7f8c99b2..2c88467c32eeed251731dcf920aeeea2aa2b1979 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -1,5 +1,5 @@ @@ -220,6 +220,10 @@ Supported map configurations: * `map1.type()` is `CV_16SC2` and `map2.type()` is `CV_16UC1` - fixed-point representation as generated by [`cv::convertMaps`](https://docs.opencv.org/4.11.0/da/d54/group__imgproc__transform.html#ga9156732fa8f01be9ebd1a194f2728b7f): * > ⚠️ **Acceleration will not work unless OpenCV is built from source patched with `opencv-4.11.patch`** * supported `interpolation`: `INTER_LINEAR` only +* `map1` is 32FC1 and `map2` is 32FC1: + * `map1` is x coordinates (column) + * `map2` is y coordinates (row) + * supported `interpolation`: `INTER_LINEAR` only ### [`cv::warpPerspective()`](https://docs.opencv.org/4.10.0/da/d54/group__imgproc__transform.html#gaf73673a7e8e18ec6963e3774e6a94b87) Performs a perspective transformation on an image. diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 9c6cdfa1d1b6c34be0c43cad436c1585be73901f..29c088ab021c867bd74f62bb8bc741cc5e1b4112 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -1834,6 +1834,60 @@ KLEIDICV_API_DECLARATION(kleidicv_remap_s16point5_u16, const uint16_t *src, const uint16_t *border_value); #endif // DOXYGEN +/// Transforms the `src` image by taking the pixels specified by the coordinates +/// from the `mapxy` image. +/// +/// Width and height are the same for `mapx`, `mapy` and for `dst`. `src` +/// dimensions may be different, but due to the limits of 32-bit float format, +/// its width and height must be less than 2^24. Coordinates outside of `src` +/// dimensions are considered border. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. Must +/// not be less than `width * sizeof(type)`, except for +/// single-row images. Must be less than 2^32. +/// @param src_width Number of elements in the source row. Must be less +/// than 2^24. +/// @param src_height Number of rows in the source data. Must be less than +/// 2^24. +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must be a multiple of `sizeof(type)` and no less than +/// `width * sizeof(type)`, except for single-row images. +/// @param dst_width Number of elements in a destination row. Must be at +/// least 4. +/// @param dst_height Number of rows in the destination data. +/// @param channels Number of channels in the (source and destination) +/// data. Must be 1. +/// @param mapx Pointer to the x coordinates' data. Must be non-null. +/// @param mapx_stride Distance in bytes from the start of one row to the +/// start of the next row for `mapx`. Must be a multiple +/// of `sizeof(float)` and no less than `width * +/// sizeof(float)`, except for single-row images. +/// @param mapy Pointer to the y coordinates' data. Must be non-null. +/// @param mapy_stride Distance in bytes from the start of one row to the +/// start of the next row for `mapy`. Must be a multiple +/// of `sizeof(float)` and no less than `width * +/// sizeof(float)`, except for single-row images. +/// @param interpolation Interpolation algorithm. Supported types: \n +/// - @ref KLEIDICV_INTERPOLATION_LINEAR +/// @param border_type Way of handling the border. The supported border types +/// are: \n +/// - @ref KLEIDICV_BORDER_TYPE_REPLICATE +/// - @ref KLEIDICV_BORDER_TYPE_CONSTANT +/// @param border_value Border values if the border_type is +/// @ref KLEIDICV_BORDER_TYPE_CONSTANT. +KLEIDICV_API_DECLARATION(kleidicv_remap_f32_u8, const uint8_t *src, + size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, + size_t dst_height, size_t channels, float *mapx, + size_t mapx_stride, float *mapy, size_t mapy_stride, + kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, + const uint8_t *border_value); + #ifndef DOXYGEN /// Internal - not part of the public API and its direct use is not supported. /// diff --git a/kleidicv/include/kleidicv/transform/remap.h b/kleidicv/include/kleidicv/transform/remap.h index 78dd1e8c58dec60d0f59d88de9e097b9178338e9..6dc74385993171844cabb5241fc27055e7f45b26 100644 --- a/kleidicv/include/kleidicv/transform/remap.h +++ b/kleidicv/include/kleidicv/transform/remap.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -50,6 +50,26 @@ inline bool remap_s16point5_is_implemented( } } +template +inline bool remap_f32_is_implemented( + size_t src_stride, size_t src_width, size_t src_height, size_t dst_width, + kleidicv_border_type_t border_type, size_t channels, + kleidicv_interpolation_type_t interpolation) KLEIDICV_STREAMING_COMPATIBLE { + if constexpr (std::is_same::value) { + return ( + src_stride <= std::numeric_limits::max() && dst_width >= 4 && + src_width <= + static_cast(std::numeric_limits::max()) + 1 && + src_height <= + static_cast(std::numeric_limits::max()) + 1 && + (border_type == KLEIDICV_BORDER_TYPE_REPLICATE || + border_type == KLEIDICV_BORDER_TYPE_CONSTANT) && + channels == 1 && interpolation == KLEIDICV_INTERPOLATION_LINEAR); + } else { + return false; + } +} + // Constants for Remap16Point5 static const uint16_t REMAP16POINT5_FRAC_BITS = 5; static const uint16_t REMAP16POINT5_FRAC_MAX = 1 << REMAP16POINT5_FRAC_BITS; @@ -76,6 +96,16 @@ kleidicv_error_t remap_s16point5(const T *src, size_t src_stride, kleidicv_border_type_t border_type, const T *border_value); +template +kleidicv_error_t remap_f32(const T *src, size_t src_stride, size_t src_width, + size_t src_height, T *dst, size_t dst_stride, + size_t dst_width, size_t dst_height, size_t channels, + float *mapx, size_t mapx_stride, float *mapy, + size_t mapy_stride, + kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, + const T *border_value); + } // namespace neon namespace sve2 { @@ -98,6 +128,15 @@ kleidicv_error_t remap_s16point5(const T *src, size_t src_stride, kleidicv_border_type_t border_type, const T *border_value); +template +kleidicv_error_t remap_f32(const T *src, size_t src_stride, size_t src_width, + size_t src_height, T *dst, size_t dst_stride, + size_t dst_width, size_t dst_height, size_t channels, + float *mapx, size_t mapx_stride, float *mapy, + size_t mapy_stride, + kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, + const T *border_value); } // namespace sve2 namespace sme2 { diff --git a/kleidicv/src/transform/common_sc.h b/kleidicv/src/transform/common_sc.h new file mode 100644 index 0000000000000000000000000000000000000000..f7a7eb01825dbc8bd578cc786f6ceba5a28c15a3 --- /dev/null +++ b/kleidicv/src/transform/common_sc.h @@ -0,0 +1,202 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kleidicv/ctypes.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" +#include "kleidicv/traits.h" +#include "kleidicv/types.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +// Convert border_type to a template argument. +template +void remap32f_process_rows(kleidicv_border_type_t border_type, Args &&...args) { + if (border_type == KLEIDICV_BORDER_TYPE_REPLICATE) { + remap32f_process_rows( + std::forward(args)...); + } else { + remap32f_process_rows( + std::forward(args)...); + } +} + +// Convert interpolation_type to a template argument. +template +void remap32f_process_rows(kleidicv_interpolation_type_t interpolation_type, + Args &&...args) { + if (interpolation_type == KLEIDICV_INTERPOLATION_NEAREST) { + remap32f_process_rows( + std::forward(args)...); + } else { + remap32f_process_rows( + std::forward(args)...); + } +} + +template +bool remap_image_is_large(const Rows &rows, size_t height) { + return rows.stride() * height >= 1ULL << 32; +} + +// Convert is_large to a template argument. +template +void remap32f_process_rows(bool is_large, Args &&...args) { + if (KLEIDICV_UNLIKELY(is_large)) { + remap32f_process_rows(std::forward(args)...); + } else { + remap32f_process_rows(std::forward(args)...); + } +} + +template +svuint32_t inline load_common(svbool_t pg, svuint32_t x, svuint32_t y, + svuint32_t sv_src_stride, + Rows &src_rows) { + if constexpr (IsLarge) { + svbool_t pg_b = pg; + svbool_t pg_t = svtrn2_b32(pg, svpfalse()); + + // Calculate offsets from coordinates (y * stride + x) + // To avoid losing precision, the final offsets should be in 64 bits + svuint64_t offsets_b = svmlalb(svmovlb(x), y, sv_src_stride); + svuint64_t offsets_t = svmlalt(svmovlt(x), y, sv_src_stride); + // Copy pixels from source + svuint64_t result_b = + svld1ub_gather_offset_u64(pg_b, &src_rows[0], offsets_b); + svuint64_t result_t = + svld1ub_gather_offset_u64(pg_t, &src_rows[0], offsets_t); + return svtrn1_u32(svreinterpret_u32_u64(result_b), + svreinterpret_u32_u64(result_t)); + } else { + svuint32_t offsets = svmla_x(pg, x, y, sv_src_stride); + return svld1ub_gather_offset_u32(pg, &src_rows[0], offsets); + } +} + +template +svuint32_t inline calculate_linear_replicate(svbool_t pg, svfloat32x2_t coords, + svfloat32_t xmaxf, + svfloat32_t ymaxf, + svuint32_t sv_src_stride, + Rows &src_rows) { + auto load_source = [&](svuint32_t x, svuint32_t y) { + return load_common(pg, x, y, sv_src_stride, src_rows); + }; + svbool_t pg_all32 = svptrue_b32(); + svfloat32_t xf = svget2(coords, 0); + svfloat32_t yf = svget2(coords, 1); + // Take the integer part, clamp it to within the dimensions of the + // source image (negative values are already saturated to 0) + svuint32_t x0 = svcvt_u32_f32_x(pg_all32, svmin_x(pg_all32, xf, xmaxf)); + svuint32_t y0 = svcvt_u32_f32_x(pg_all32, svmin_x(pg_all32, yf, ymaxf)); + + // Get fractional part, or 0 if out of range + svbool_t x_in_range = svand_z(pg_all32, svcmpge_n_f32(pg_all32, xf, 0.F), + svcmplt_f32(pg_all32, xf, xmaxf)); + svbool_t y_in_range = svand_z(pg_all32, svcmpge_n_f32(pg_all32, yf, 0.F), + svcmplt_f32(pg_all32, yf, ymaxf)); + svfloat32_t xfrac = + svsel_f32(x_in_range, svsub_f32_x(pg_all32, xf, svrintm_x(pg_all32, xf)), + svdup_n_f32(0.F)); + svfloat32_t yfrac = + svsel_f32(y_in_range, svsub_f32_x(pg_all32, yf, svrintm_x(pg_all32, yf)), + svdup_n_f32(0.F)); + + // x1 = x0 + 1, except if it's already xmax or out of range + svuint32_t x1 = svsel_u32(x_in_range, svadd_n_u32_x(pg_all32, x0, 1), x0); + svuint32_t y1 = svsel_u32(y_in_range, svadd_n_u32_x(pg_all32, y0, 1), y0); + + // Calculate offsets from coordinates (y * stride + x) + // a: top left, b: top right, c: bottom left, d: bottom right + svfloat32_t a = svcvt_f32_u32_x(pg_all32, load_source(x0, y0)); + svfloat32_t b = svcvt_f32_u32_x(pg_all32, load_source(x1, y0)); + svfloat32_t line0 = + svmla_f32_x(pg_all32, a, svsub_f32_x(pg_all32, b, a), xfrac); + svfloat32_t c = svcvt_f32_u32_x(pg_all32, load_source(x0, y1)); + svfloat32_t d = svcvt_f32_u32_x(pg_all32, load_source(x1, y1)); + svfloat32_t line1 = + svmla_f32_x(pg_all32, c, svsub_f32_x(pg_all32, d, c), xfrac); + svfloat32_t result = + svmla_f32_x(pg_all32, line0, svsub_f32_x(pg_all32, line1, line0), yfrac); + return svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, result, 0.5F)); +} + +template +svuint32_t get_pixels_or_border(svbool_t pg, svuint32_t x, svuint32_t y, + svuint32_t sv_border, svuint32_t sv_xmax, + svuint32_t sv_ymax, svuint32_t sv_src_stride, + Rows &src_rows) { + svbool_t in_range = + svand_b_z(pg, svcmple_u32(pg, x, sv_xmax), svcmple_u32(pg, y, sv_ymax)); + svuint32_t result = + load_common(in_range, x, y, sv_src_stride, src_rows); + // Select between source pixels and border colour + return svsel_u32(in_range, result, sv_border); +} + +template +svuint32_t inline calculate_linear_constant_border( + svbool_t pg, svfloat32x2_t coords, svuint32_t sv_border, svuint32_t sv_xmax, + svuint32_t sv_ymax, svuint32_t sv_src_stride, + Rows &src_rows) { + svfloat32_t xf = svget2(coords, 0); + svfloat32_t yf = svget2(coords, 1); + + // Convert obviously out-of-range coordinates to values that are just beyond + // the largest permitted image width & height. This avoids the need for + // special case handling elsewhere. + svfloat32_t big = svdup_n_f32(1 << 24); + xf = svsel_f32(svcmple_f32(pg, svabs_f32_x(pg, xf), big), xf, big); + yf = svsel_f32(svcmple_f32(pg, svabs_f32_x(pg, yf), big), yf, big); + + svfloat32_t xf0 = svrintm_f32_x(pg, xf); + svfloat32_t yf0 = svrintm_f32_x(pg, yf); + + svint32_t x0 = svcvt_s32_x(pg, xf0); + svint32_t y0 = svcvt_s32_x(pg, yf0); + svint32_t x1 = svadd_s32_x(pg, x0, svdup_n_s32(1)); + svint32_t y1 = svadd_s32_x(pg, y0, svdup_n_s32(1)); + + svfloat32_t xfrac = svsub_f32_x(pg, xf, xf0); + svfloat32_t yfrac = svsub_f32_x(pg, yf, yf0); + + svfloat32_t a = svcvt_f32_u32_x( + pg, get_pixels_or_border( + pg, svreinterpret_u32_s32(x0), svreinterpret_u32_s32(y0), + sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows)); + svfloat32_t b = svcvt_f32_u32_x( + pg, get_pixels_or_border( + pg, svreinterpret_u32_s32(x1), svreinterpret_u32_s32(y0), + sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows)); + svfloat32_t line0 = svmla_f32_x(pg, a, svsub_f32_x(pg, b, a), xfrac); + svfloat32_t c = svcvt_f32_u32_x( + pg, get_pixels_or_border( + pg, svreinterpret_u32_s32(x0), svreinterpret_u32_s32(y1), + sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows)); + svfloat32_t d = svcvt_f32_u32_x( + pg, get_pixels_or_border( + pg, svreinterpret_u32_s32(x1), svreinterpret_u32_s32(y1), + sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows)); + svfloat32_t line1 = svmla_f32_x(pg, c, svsub_f32_x(pg, d, c), xfrac); + svfloat32_t result = + svmla_f32_x(pg, line0, svsub_f32_x(pg, line1, line0), yfrac); + return svcvt_u32_f32_x(pg, svrinta_f32_x(pg, result)); +} + +} // namespace KLEIDICV_TARGET_NAMESPACE diff --git a/kleidicv/src/transform/remap_api.cpp b/kleidicv/src/transform/remap_api.cpp index bda9c17f6958a5d405c4593bc0e7bcf733820020..9debeda611df78a246cc43c01562f3a265a979ef 100644 --- a/kleidicv/src/transform/remap_api.cpp +++ b/kleidicv/src/transform/remap_api.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -22,3 +22,7 @@ KLEIDICV_MULTIVERSION_C_API(kleidicv_remap_s16point5_u16, &kleidicv::neon::remap_s16point5, &kleidicv::sve2::remap_s16point5, nullptr); + +KLEIDICV_MULTIVERSION_C_API(kleidicv_remap_f32_u8, + &kleidicv::neon::remap_f32, + &kleidicv::sve2::remap_f32, nullptr); diff --git a/kleidicv/src/transform/remap_neon.cpp b/kleidicv/src/transform/remap_neon.cpp index a540daf56a69b92310a1bb87de6ee1b08f217b5e..c671c1d9ee31344569e0c8473ed31012f559114c 100644 --- a/kleidicv/src/transform/remap_neon.cpp +++ b/kleidicv/src/transform/remap_neon.cpp @@ -1,8 +1,9 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 #include +#include // TODO: check #include #include "kleidicv/kleidicv.h" @@ -259,9 +260,11 @@ class RemapS16Point5Replicate { FracVectorType frac = vld1q_u16(&mapfrac[0]); uint16x8_t xfrac = vbslq_u16(vcltq_s16(xy.val[0], vdupq_n_s16(0)), vdupq_n_u16(0), + // extract xfrac = frac[0:4] vandq_u16(frac, vdupq_n_u16(REMAP16POINT5_FRAC_MAX - 1))); uint16x8_t yfrac = vbslq_u16(vcltq_s16(xy.val[1], vdupq_n_s16(0)), vdupq_n_u16(0), + // extract yfrac = frac[5:9] vandq_u16(vshrq_n_u16(frac, REMAP16POINT5_FRAC_BITS), vdupq_n_u16(REMAP16POINT5_FRAC_MAX - 1))); uint16x8_t nxfrac = vsubq_u16(vdupq_n_u16(REMAP16POINT5_FRAC_MAX), xfrac); @@ -887,6 +890,392 @@ kleidicv_error_t remap_s16point5( } // NOLINTEND(readability-function-cognitive-complexity) +template +class RemapF32Replicate; + +template +class RemapF32Replicate { + public: + using ScalarType = uint8_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; // float32x4_t + + RemapF32Replicate(Rows src_rows, size_t src_width, + size_t src_height) + : src_rows_{src_rows}, + v_src_stride_{vdup_n_u32(static_cast(src_rows_.stride()))}, + vq_src_stride_{vdupq_n_u32(static_cast(src_rows_.stride()))}, + v_xmax_{vdupq_n_u32(static_cast(src_width - 1))}, + v_ymax_{vdupq_n_u32(static_cast(src_height - 1))} {} + + void process_row(size_t width, Columns mapx, + Columns mapy, Columns dst) { + const size_t kStep = VecTraits::num_lanes(); + + auto load_src_into_floats_small = [&](uint32x4_t x, uint32x4_t y) { + uint32x4_t offset = vmlaq_u32(x, y, vq_src_stride_); + uint64_t acc = + static_cast(src_rows_[vgetq_lane_u32(offset, 0)]) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 1)]) << 32); + uint64x2_t rawsrc = vdupq_n_u64(acc); + acc = static_cast(src_rows_[vgetq_lane_u32(offset, 2)]) | + (static_cast(src_rows_[vgetq_lane_u32(offset, 3)]) << 32); + rawsrc = vsetq_lane_u64(acc, rawsrc, 1); + return vcvtq_f32_u32(vreinterpretq_u32_u64(rawsrc)); + }; + + auto load_src_into_floats_large = [&](uint32x4_t x, uint32x4_t y) { + uint64x2_t offset_low = + vmlal_u32(vmovl_u32(vget_low_u32(x)), vget_low_u32(y), v_src_stride_); + uint64x2_t offset_high = + vmlal_u32(vmovl_high_u32(x), vget_high_u32(y), v_src_stride_); + uint64_t acc = + static_cast(src_rows_[vgetq_lane_u64(offset_low, 0)]) | + (static_cast(src_rows_[vgetq_lane_u64(offset_low, 1)]) + << 32); + uint64x2_t rawsrc = vdupq_n_u64(acc); + acc = static_cast(src_rows_[vgetq_lane_u64(offset_high, 0)]) | + (static_cast(src_rows_[vgetq_lane_u64(offset_high, 1)]) + << 32); + rawsrc = vsetq_lane_u64(acc, rawsrc, 1); + return vcvtq_f32_u32(vreinterpretq_u32_u64(rawsrc)); + }; + + auto load = [&](uint32x4_t x, uint32x4_t y) { + if constexpr (IsLarge) { + return load_src_into_floats_large(x, y); + } else { + return load_src_into_floats_small(x, y); + } + }; + + auto vector_path_1 = [&](const float *ptr_mapx, const float *ptr_mapy) { + MapVectorType x = vld1q_f32(ptr_mapx); + MapVectorType y = vld1q_f32(ptr_mapy); + // Truncating convert to int + uint32x4_t x0 = vminq_u32(vcvtmq_u32_f32(x), v_xmax_); + uint32x4_t y0 = vminq_u32(vcvtmq_u32_f32(y), v_ymax_); + + // Get fractional part, or 0 if out of range + float32x4_t zero = vdupq_n_f32(0.F); + uint32x4_t x_in_range = + vandq_u32(vcgeq_f32(x, zero), vcltq_u32(x0, v_xmax_)); + uint32x4_t y_in_range = + vandq_u32(vcgeq_f32(y, zero), vcltq_u32(y0, v_ymax_)); + float32x4_t xfrac = + vbslq_f32(x_in_range, vsubq_f32(x, vrndmq_f32(x)), zero); + float32x4_t yfrac = + vbslq_f32(y_in_range, vsubq_f32(y, vrndmq_f32(y)), zero); + + // x1 = x0 + 1, except if it's already xmax or out of range + uint32x4_t x1 = vsubq_u32(x0, x_in_range); + uint32x4_t y1 = vsubq_u32(y0, y_in_range); + + // Calculate offsets from coordinates (y * stride + x) + // a: top left, b: top right, c: bottom left, d: bottom right + float32x4_t a = load(x0, y0); + float32x4_t b = load(x1, y0); + float32x4_t line0 = vmlaq_f32(a, vsubq_f32(b, a), xfrac); + float32x4_t c = load(x0, y1); + float32x4_t d = load(x1, y1); + float32x4_t line1 = vmlaq_f32(c, vsubq_f32(d, c), xfrac); + float32x4_t result = vmlaq_f32(line0, vsubq_f32(line1, line0), yfrac); + return vminq_u32(vdupq_n_u32(0xFF), vcvtaq_u32_f32(result)); + }; + + auto vector_path_4 = [&](size_t step) { // step = 4*4 = 16 + const float *ptr_mapx = &mapx[0]; + const float *ptr_mapy = &mapy[0]; + uint32x4_t res0 = vector_path_1(ptr_mapx, ptr_mapy); + + ptr_mapx += kStep; + ptr_mapy += kStep; + uint32x4_t res1 = vector_path_1(ptr_mapx, ptr_mapy); + uint16x8_t result16_0 = vuzp1q_u16(res0, res1); + + ptr_mapx += kStep; + ptr_mapy += kStep; + res0 = vector_path_1(ptr_mapx, ptr_mapy); + + ptr_mapx += kStep; + ptr_mapy += kStep; + res1 = vector_path_1(ptr_mapx, ptr_mapy); + uint16x8_t result16_1 = vuzp1q_u16(res0, res1); + vst1q_u8(&dst[0], vuzp1q_u8(result16_0, result16_1)); + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + + LoopUnroll loop{width, kStep}; + loop.unroll_four_times(vector_path_4); + loop.unroll_once([&](size_t step) { + const float *ptr_mapx = &mapx[0]; + const float *ptr_mapy = &mapy[0]; + uint32x4_t result = vector_path_1(ptr_mapx, ptr_mapy); + dst[0] = vgetq_lane_u32(result, 0); + dst[1] = vgetq_lane_u32(result, 1); + dst[2] = vgetq_lane_u32(result, 2); + dst[3] = vgetq_lane_u32(result, 3); + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapx -= back_step; + mapy -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t) { + const float *ptr_mapx = &mapx[0]; + const float *ptr_mapy = &mapy[0]; + uint32x4_t result = vector_path_1(ptr_mapx, ptr_mapy); + dst[0] = vgetq_lane_u32(result, 0); + dst[1] = vgetq_lane_u32(result, 1); + dst[2] = vgetq_lane_u32(result, 2); + dst[3] = vgetq_lane_u32(result, 3); + }); + } + + private: + Rows src_rows_; + uint32x2_t v_src_stride_; // load_large + uint32x4_t vq_src_stride_; // load_small + uint32x4_t v_xmax_; + uint32x4_t v_ymax_; +}; // end of class RemapF32Replicate + +template +class RemapF32ConstantBorder; + +// TODO: Need to refactor to reduce the complexity +// NOLINTBEGIN(readability-function-cognitive-complexity) +template +class RemapF32ConstantBorder { + public: + using ScalarType = uint8_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; // float32x4_t + + RemapF32ConstantBorder(Rows src_rows, size_t src_width, + size_t src_height, const ScalarType *border_value) + : src_rows_{src_rows}, + src_width_{src_width}, + src_height_{src_height}, + border_value_{border_value} {} + + void process_row(size_t width, Columns mapx, + Columns mapy, Columns dst) { + const size_t kStep = VecTraits::num_lanes(); + + auto get_edge_pixels = + [&](unsigned &a_result, unsigned &b_result, unsigned &c_result, + unsigned &d_result, int x0, int y0, ptrdiff_t offset, + Rows src_rows, int src_width, int src_height) { + if (y0 >= 0) { + if (x0 >= 0) { + a_result = src_rows[offset]; + } + if (x0 + 1 < src_width) { + b_result = src_rows[offset + 1]; + } + } + if (y0 + 1 < src_height) { + offset += static_cast(src_rows.stride()); + if (x0 >= 0) { + c_result = src_rows[offset]; + } + if (x0 + 1 < src_width) { + d_result = src_rows[offset + 1]; + } + } + }; + + auto vector_path_1 = [&](const float *ptr_mapx, const float *ptr_mapy) { + MapVectorType xf = vld1q_f32(ptr_mapx); + MapVectorType yf = vld1q_f32(ptr_mapy); + // Convert obviously out-of-range coordinates to values that are just + // beyond the largest permitted image width & height. This avoids the need + // for special case handling elsewhere. + float32x4_t big = vdupq_n_f32(1 << 24); + xf = vbslq_f32(vcleq_f32(vabsq_f32(xf), big), xf, big); + yf = vbslq_f32(vcleq_f32(vabsq_f32(yf), big), yf, big); + + int32x4_t x0 = vcvtmq_s32_f32(xf); + int32x4_t y0 = vcvtmq_s32_f32(yf); + int x0_array[4], y0_array[4]; + unsigned a_array[4], b_array[4], c_array[4], d_array[4]; + vst1q_s32(x0_array, x0); + vst1q_s32(y0_array, y0); + for (int i = 0; i < 4; ++i) { + int x0i = x0_array[i]; + int y0i = y0_array[i]; + ptrdiff_t offset = x0i + y0i * src_rows_.stride(); + + // src_width < (1ULL << 24) && src_height_ < (1ULL << 24) is guaranteed + if (x0i < 0 || x0i + 1 >= static_cast(src_width_) || y0i < 0 || + y0i + 1 >= static_cast(src_height_)) { + // Not entirely within the source image + + a_array[i] = b_array[i] = c_array[i] = d_array[i] = border_value_[0]; + + if (x0i < -1 || x0i >= static_cast(src_width_) || y0i < -1 || + y0i >= static_cast(src_height_)) { + // Completely outside the source image + continue; + } + + get_edge_pixels(a_array[i], b_array[i], c_array[i], d_array[i], x0i, + y0i, offset, src_rows_, src_width_, src_height_); + continue; + } + + // Completely inside the source image + a_array[i] = src_rows_[offset]; + b_array[i] = src_rows_[offset + 1]; + offset += src_rows_.stride(); + c_array[i] = src_rows_[offset]; + d_array[i] = src_rows_[offset + 1]; + } + + float32x4_t xfrac = vsubq_f32(xf, vrndmq_f32(xf)); + float32x4_t yfrac = vsubq_f32(yf, vrndmq_f32(yf)); + float32x4_t a = vcvtq_f32_u32(vld1q_u32(a_array)); + float32x4_t b = vcvtq_f32_u32(vld1q_u32(b_array)); + float32x4_t line0 = vmlaq_f32(a, vsubq_f32(b, a), xfrac); + float32x4_t c = vcvtq_f32_u32(vld1q_u32(c_array)); + float32x4_t d = vcvtq_f32_u32(vld1q_u32(d_array)); + float32x4_t line1 = vmlaq_f32(c, vsubq_f32(d, c), xfrac); + float32x4_t result = vmlaq_f32(line0, vsubq_f32(line1, line0), yfrac); + return vcvtaq_u32_f32(result); + }; + + auto vector_path_4 = [&](size_t step) { // step = 4*4 = 16 + const float *ptr_mapx = &mapx[0]; + const float *ptr_mapy = &mapy[0]; + uint32x4_t res0 = vector_path_1(ptr_mapx, ptr_mapy); + + ptr_mapx += kStep; + ptr_mapy += kStep; + uint32x4_t res1 = vector_path_1(ptr_mapx, ptr_mapy); + uint16x8_t result16_0 = vuzp1q_u16(res0, res1); + + ptr_mapx += kStep; + ptr_mapy += kStep; + res0 = vector_path_1(ptr_mapx, ptr_mapy); + + ptr_mapx += kStep; + ptr_mapy += kStep; + res1 = vector_path_1(ptr_mapx, ptr_mapy); + uint16x8_t result16_1 = vuzp1q_u16(res0, res1); + vst1q_u8(&dst[0], vuzp1q_u8(result16_0, result16_1)); + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + + LoopUnroll loop{width, kStep}; + loop.unroll_four_times(vector_path_4); + loop.unroll_once([&](size_t step) { + const float *ptr_mapx = &mapx[0]; + const float *ptr_mapy = &mapy[0]; + uint32x4_t result = vector_path_1(ptr_mapx, ptr_mapy); + dst[0] = vgetq_lane_u32(result, 0); + dst[1] = vgetq_lane_u32(result, 1); + dst[2] = vgetq_lane_u32(result, 2); + dst[3] = vgetq_lane_u32(result, 3); + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapx -= back_step; + mapy -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t) { + const float *ptr_mapx = &mapx[0]; + const float *ptr_mapy = &mapy[0]; + uint32x4_t result = vector_path_1(ptr_mapx, ptr_mapy); + dst[0] = vgetq_lane_u32(result, 0); + dst[1] = vgetq_lane_u32(result, 1); + dst[2] = vgetq_lane_u32(result, 2); + dst[3] = vgetq_lane_u32(result, 3); + }); + } + + private: + Rows src_rows_; + size_t src_width_; + size_t src_height_; + const ScalarType *border_value_; +}; // end of class RemapF32ConstantBorder +// NOLINTEND(readability-function-cognitive-complexity) + +// Most of the complexity comes from parameter checking. +// NOLINTBEGIN(readability-function-cognitive-complexity) +template +kleidicv_error_t remap_f32(const T *src, size_t src_stride, size_t src_width, + size_t src_height, T *dst, size_t dst_stride, + size_t dst_width, size_t dst_height, size_t channels, + float *mapx, size_t mapx_stride, float *mapy, + size_t mapy_stride, + kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, + [[maybe_unused]] const T *border_value) { + // may need to remove the maybe_unused + CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); + CHECK_POINTER_AND_STRIDE(mapx, mapx_stride, dst_height); + CHECK_POINTER_AND_STRIDE(mapy, mapy_stride, dst_height); + CHECK_IMAGE_SIZE(src_width, src_height); + CHECK_IMAGE_SIZE(dst_width, dst_height); + if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) { + return KLEIDICV_ERROR_NULL_POINTER; + } + + if (!remap_f32_is_implemented(src_stride, src_width, src_height, dst_width, + border_type, channels, interpolation)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + // Calculating in float32_t will only be precise until 24 bits + if (src_width >= (1ULL << 24) || src_height >= (1ULL << 24) || + dst_width >= (1ULL << 24) || dst_height >= (1ULL << 24)) { + return KLEIDICV_ERROR_RANGE; + } + + Rows src_rows{src, src_stride, channels}; + Rows mapx_rows{mapx, mapx_stride, 1}; + Rows mapy_rows{mapy, mapy_stride, 1}; + Rows dst_rows{dst, dst_stride, channels}; + Rectangle rect{dst_width, dst_height}; + + if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT) { + if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { + RemapF32ConstantBorder operation{src_rows, src_width, src_height, + border_value}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } else { + RemapF32ConstantBorder operation{src_rows, src_width, + src_height, border_value}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } + } else { + assert(border_type == KLEIDICV_BORDER_TYPE_REPLICATE); + if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { + RemapF32Replicate operation{src_rows, src_width, src_height}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } else { + RemapF32Replicate operation{src_rows, src_width, src_height}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } + } + + return KLEIDICV_OK; +} +// NOLINTEND(readability-function-cognitive-complexity) + #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(type) \ template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16( \ const type *src, size_t src_stride, size_t src_width, size_t src_height, \ @@ -908,4 +1297,14 @@ KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint16_t); KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16Point5(uint8_t); KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16Point5(uint16_t); +#define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_f32( \ + const type *src, size_t src_stride, size_t src_width, size_t src_height, \ + type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \ + size_t channels, float *mapx, size_t mapx_stride, float *mapy, \ + size_t mapy_stride, kleidicv_interpolation_type_t interpolation, \ + kleidicv_border_type_t border_type, const type *border_value) + +KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint8_t); + } // namespace kleidicv::neon diff --git a/kleidicv/src/transform/remap_sc.h b/kleidicv/src/transform/remap_sc.h index 65c5afe1191b41db63103bc776daebe89ca439d7..8cf574b91c0aa3b5579a1a2396406c187e7b18ae 100644 --- a/kleidicv/src/transform/remap_sc.h +++ b/kleidicv/src/transform/remap_sc.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -15,6 +15,7 @@ #include #include +#include "common_sc.h" #include "kleidicv/sve2.h" #include "kleidicv/transform/remap.h" @@ -822,6 +823,150 @@ kleidicv_error_t remap_s16point5_sc( } // NOLINTEND(readability-function-cognitive-complexity) +template +void remap32f_process_rows(Rows src_rows, size_t src_width, + size_t src_height, const ScalarType* border_value, + Rows dst_rows, size_t dst_width, + size_t y_begin, size_t y_end, + Rows mapx_rows, + Rows mapy_rows) { + svbool_t pg_all32 = svptrue_b32(); + svuint32_t sv_xmax = svdup_n_u32(src_width - 1); + svuint32_t sv_ymax = svdup_n_u32(src_height - 1); + svuint32_t sv_src_stride = svdup_n_u32(src_rows.stride()); + svuint32_t sv_border; + // sv_border is only used if the border type is constant. + // If the border type is not constant then border_value is permitted to be + // null and must not be read. + if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { + sv_border = svdup_n_u32(border_value[0]); + } + + svfloat32_t xmaxf = svdup_n_f32(static_cast(src_width - 1)); + svfloat32_t ymaxf = svdup_n_f32(static_cast(src_height - 1)); + + const size_t kStep = VecTraits::num_lanes(); + + // auto get_coordinates = [&](svbool_t pg, size_t xs) { + auto coordinate_getter = [&](svbool_t pg, size_t xs) { + auto x = static_cast(xs); + return svcreate2(svld1_f32(pg, &mapx_rows.as_columns()[x]), + svld1_f32(pg, &mapy_rows.as_columns()[x])); + }; + + auto calculate_linear = [&](svbool_t pg, uint32_t x) { + if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) { + svfloat32x2_t coords = coordinate_getter(pg, x); + return calculate_linear_replicate( + pg, coords, xmaxf, ymaxf, sv_src_stride, src_rows); + } else { + static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT); + svfloat32x2_t coords = coordinate_getter(pg, x); + return calculate_linear_constant_border( + pg, coords, sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows); + } + }; + + auto process_row = [&]() { + Columns dst = dst_rows.as_columns(); + LoopUnroll2 loop{dst_width, kStep}; + // GCOVR_EXCL_START + if constexpr (Inter == KLEIDICV_INTERPOLATION_NEAREST) { + assert(!"INTER_NEAREST not implemented for RemapF32"); + // GCOVR_EXCL_STOP + } else if constexpr (Inter == KLEIDICV_INTERPOLATION_LINEAR) { + loop.unroll_four_times([&](size_t x) { + ScalarType* p_dst = &dst[static_cast(x)]; + svuint32_t res0 = calculate_linear(pg_all32, x); + x += kStep; + svuint32_t res1 = calculate_linear(pg_all32, x); + svuint16_t result16_0 = svuzp1_u16(svreinterpret_u16_u32(res0), + svreinterpret_u16_u32(res1)); + x += kStep; + res0 = calculate_linear(pg_all32, x); + x += kStep; + res1 = calculate_linear(pg_all32, x); + svuint16_t result16_1 = svuzp1_u16(svreinterpret_u16_u32(res0), + svreinterpret_u16_u32(res1)); + svst1_u8(svptrue_b8(), p_dst, + svuzp1_u8(svreinterpret_u8_u16(result16_0), + svreinterpret_u8_u16(result16_1))); + }); + loop.unroll_once([&](size_t x) { + ScalarType* p_dst = &dst[static_cast(x)]; + svuint32_t result = calculate_linear(pg_all32, x); + svst1b_u32(pg_all32, p_dst, result); + }); + loop.remaining([&](size_t x, size_t x_max) { + ScalarType* p_dst = &dst[static_cast(x)]; + svbool_t pg32 = svwhilelt_b32(x, x_max); + svuint32_t result = calculate_linear(pg32, x); + svst1b_u32(pg32, p_dst, result); + }); + } else { + static_assert(Inter == KLEIDICV_INTERPOLATION_NEAREST || + Inter == KLEIDICV_INTERPOLATION_LINEAR, + ": Unknown interpolation type!"); + } + ++mapx_rows; + ++mapy_rows; + }; + + for (size_t y = y_begin; y < y_end; ++y) { + process_row(); + ++dst_rows; + } +} + +// Most of the complexity comes from parameter checking. +// NOLINTBEGIN(readability-function-cognitive-complexity) +template +kleidicv_error_t remap_f32_sc(const T* src, size_t src_stride, size_t src_width, + size_t src_height, T* dst, size_t dst_stride, + size_t dst_width, size_t dst_height, + size_t channels, float* mapx, size_t mapx_stride, + float* mapy, size_t mapy_stride, + kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, + [[maybe_unused]] const T* border_value) { + // may need to remove the maybe_unused + CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); + CHECK_POINTER_AND_STRIDE(mapx, mapx_stride, dst_height); + CHECK_POINTER_AND_STRIDE(mapy, mapy_stride, dst_height); + CHECK_IMAGE_SIZE(src_width, src_height); + CHECK_IMAGE_SIZE(dst_width, dst_height); + if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) { + return KLEIDICV_ERROR_NULL_POINTER; + } + + if (!remap_f32_is_implemented(src_stride, src_width, src_height, dst_width, + border_type, channels, interpolation)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + // Calculating in float32_t will only be precise until 24 bits + if (src_width >= (1ULL << 24) || src_height >= (1ULL << 24) || + dst_width >= (1ULL << 24) || dst_height >= (1ULL << 24)) { + return KLEIDICV_ERROR_RANGE; + } + + Rows src_rows{src, src_stride, channels}; + Rows mapx_rows{mapx, mapx_stride, 1}; + Rows mapy_rows{mapy, mapy_stride, 1}; + Rows dst_rows{dst, dst_stride, channels}; + Rectangle rect{dst_width, dst_height}; + + remap32f_process_rows(remap_image_is_large(src_rows, src_height), + interpolation, border_type, src_rows, src_width, + src_height, border_value, dst_rows, dst_width, 0, + dst_height, mapx_rows, mapy_rows); + + return KLEIDICV_OK; +} +// NOLINTEND(readability-function-cognitive-complexity) + } // namespace KLEIDICV_TARGET_NAMESPACE #endif // KLEIDICV_REMAP_SC_H diff --git a/kleidicv/src/transform/remap_sve2.cpp b/kleidicv/src/transform/remap_sve2.cpp index 0777091d701349b6b91e9ce98d28ac91a4194655..624b7ce2479616e6ed86a4a73ceec7e160cb9582 100644 --- a/kleidicv/src/transform/remap_sve2.cpp +++ b/kleidicv/src/transform/remap_sve2.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -33,6 +33,21 @@ kleidicv_error_t remap_s16point5(const T *src, size_t src_stride, border_type, border_value); } +template +kleidicv_error_t remap_f32(const T *src, size_t src_stride, size_t src_width, + size_t src_height, T *dst, size_t dst_stride, + size_t dst_width, size_t dst_height, size_t channels, + float *mapx, size_t mapx_stride, float *mapy, + size_t mapy_stride, + kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, + const T *border_value) { + return remap_f32_sc(src, src_stride, src_width, src_height, dst, + dst_stride, dst_width, dst_height, channels, + mapx, mapx_stride, mapy, mapy_stride, + interpolation, border_type, border_value); +} + #define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(type) \ template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_s16( \ const type *src, size_t src_stride, size_t src_width, size_t src_height, \ @@ -54,4 +69,14 @@ KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16(uint16_t); KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16Point5(uint8_t); KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_S16Point5(uint16_t); +#define KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t remap_f32( \ + const type *src, size_t src_stride, size_t src_width, size_t src_height, \ + type *dst, size_t dst_stride, size_t dst_width, size_t dst_height, \ + size_t channels, float *mapx, size_t mapx_stride, float *mapy, \ + size_t mapy_stride, kleidicv_interpolation_type_t interpolation, \ + kleidicv_border_type_t border_type, const type *border_value) + +KLEIDICV_INSTANTIATE_TEMPLATE_REMAP_F32(uint8_t); + } // namespace kleidicv::sve2 diff --git a/kleidicv/src/transform/warp_perspective_sc.h b/kleidicv/src/transform/warp_perspective_sc.h index 20cda8d79922baf44c761f9983563df2ed2dc6f9..9656d318c5eb28f8181d5c51b9e8e14542cb97f0 100644 --- a/kleidicv/src/transform/warp_perspective_sc.h +++ b/kleidicv/src/transform/warp_perspective_sc.h @@ -8,6 +8,7 @@ #include #include +#include "common_sc.h" #include "kleidicv/ctypes.h" #include "kleidicv/kleidicv.h" #include "kleidicv/sve2.h" @@ -41,14 +42,12 @@ namespace KLEIDICV_TARGET_NAMESPACE { template -void warp_perspective_operation(Rows src_rows, - size_t src_width, size_t src_height, - const float transform[9], - const ScalarType *border_value, - Rows dst_rows, size_t dst_width, - size_t y_begin, size_t y_end) { +void remap32f_process_rows(Rows src_rows, size_t src_width, + size_t src_height, const ScalarType *border_value, + Rows dst_rows, size_t dst_width, + size_t y_begin, size_t y_end, + const float transform[9]) { svbool_t pg_all32 = svptrue_b32(); - svfloat32_t sv_0123 = svcvt_f32_u32_z(pg_all32, svindex_u32(0, 1)); svuint32_t sv_xmax = svdup_n_u32(src_width - 1); svuint32_t sv_ymax = svdup_n_u32(src_height - 1); svuint32_t sv_src_stride = svdup_n_u32(src_rows.stride()); @@ -60,16 +59,18 @@ void warp_perspective_operation(Rows src_rows, sv_border = svdup_n_u32(border_value[0]); } - svfloat32_t T0 = svdup_n_f32(transform[0]); - svfloat32_t T3 = svdup_n_f32(transform[3]); - svfloat32_t T6 = svdup_n_f32(transform[6]); - svfloat32_t tx0, ty0, tw0; svfloat32_t xmaxf = svdup_n_f32(static_cast(src_width - 1)); svfloat32_t ymaxf = svdup_n_f32(static_cast(src_height - 1)); const size_t kStep = VecTraits::num_lanes(); - auto calculate_coordinates = [&](size_t x) { + svfloat32_t sv_0123 = svcvt_f32_u32_z(pg_all32, svindex_u32(0, 1)); + svfloat32_t T0 = svdup_n_f32(transform[0]); + svfloat32_t T3 = svdup_n_f32(transform[3]); + svfloat32_t T6 = svdup_n_f32(transform[6]); + svfloat32_t tx0, ty0, tw0; + + auto coordinate_getter = [&](svbool_t, size_t x) { svfloat32_t vx = svadd_n_f32_x(pg_all32, sv_0123, static_cast(x)); // Calculate half-transformed values from the first few pixel values, // plus Tn*x, similarly to the one above @@ -84,8 +85,8 @@ void warp_perspective_operation(Rows src_rows, svmul_f32_x(pg_all32, ty, iw)); }; - auto calculate_nearest_coordinates = [&](size_t x) { - svfloat32x2_t coords = calculate_coordinates(x); + auto calculate_nearest_coordinates = [&](svbool_t pg32, size_t x) { + svfloat32x2_t coords = coordinate_getter(pg32, x); svfloat32_t xf = svget2(coords, 0); svfloat32_t yf = svget2(coords, 1); @@ -109,32 +110,11 @@ void warp_perspective_operation(Rows src_rows, return svcreate2(xi, yi); }; - auto load = [&](svbool_t pg, svuint32_t x, svuint32_t y) { - if constexpr (IsLarge) { - svbool_t pg_b = pg; - svbool_t pg_t = svtrn2_b32(pg, svpfalse()); - - // Calculate offsets from coordinates (y * stride + x) - // To avoid losing precision, the final offsets should be in 64 bits - svuint64_t offsets_b = svmlalb(svmovlb(x), y, sv_src_stride); - svuint64_t offsets_t = svmlalt(svmovlt(x), y, sv_src_stride); - // Copy pixels from source - svuint64_t result_b = - svld1ub_gather_offset_u64(pg_b, &src_rows[0], offsets_b); - svuint64_t result_t = - svld1ub_gather_offset_u64(pg_t, &src_rows[0], offsets_t); - return svtrn1_u32(svreinterpret_u32_u64(result_b), - svreinterpret_u32_u64(result_t)); - } else { - svuint32_t offsets = svmla_x(pg, x, y, sv_src_stride); - return svld1ub_gather_offset_u32(pg, &src_rows[0], offsets); - } - }; - auto get_pixels_or_border = [&](svbool_t pg, svuint32_t x, svuint32_t y) { svbool_t in_range = svand_b_z(pg, svcmple_u32(pg, x, sv_xmax), svcmple_u32(pg, y, sv_ymax)); - svuint32_t result = load(in_range, x, y); + svuint32_t result = load_common( + in_range, x, y, sv_src_stride, src_rows); // Select between source pixels and border colour return svsel_u32(in_range, result, sv_border); }; @@ -146,19 +126,22 @@ void warp_perspective_operation(Rows src_rows, if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { return get_pixels_or_border(pg_all32, x, y); } else { - return load(pg_all32, x, y); + return load_common(pg_all32, x, y, sv_src_stride, + src_rows); } }; ScalarType *p_dst = &dst[static_cast(x)]; - svuint32_t res32_0 = load_source(calculate_nearest_coordinates(x)); + svuint32_t res32_0 = + load_source(calculate_nearest_coordinates(pg_all32, x)); x += kStep; - svuint32_t res32_1 = load_source(calculate_nearest_coordinates(x)); + svuint32_t res32_1 = + load_source(calculate_nearest_coordinates(pg_all32, x)); svuint16_t result0 = svuzp1_u16(svreinterpret_u16_u32(res32_0), svreinterpret_u16_u32(res32_1)); x += kStep; - res32_0 = load_source(calculate_nearest_coordinates(x)); + res32_0 = load_source(calculate_nearest_coordinates(pg_all32, x)); x += kStep; - res32_1 = load_source(calculate_nearest_coordinates(x)); + res32_1 = load_source(calculate_nearest_coordinates(pg_all32, x)); svuint16_t result1 = svuzp1_u16(svreinterpret_u16_u32(res32_0), svreinterpret_u16_u32(res32_1)); svuint8_t result = @@ -171,7 +154,7 @@ void warp_perspective_operation(Rows src_rows, size_t length = x_max - x; svbool_t pg32 = svwhilelt_b32(0ULL, length); - svuint32x2_t coords = calculate_nearest_coordinates(x); + svuint32x2_t coords = calculate_nearest_coordinates(pg32, x); svuint32_t xi = svget2(coords, 0); svuint32_t yi = svget2(coords, 1); @@ -179,109 +162,26 @@ void warp_perspective_operation(Rows src_rows, if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { result = get_pixels_or_border(pg32, xi, yi); } else { - result = load(pg32, xi, yi); + result = load_common(pg32, xi, yi, sv_src_stride, + src_rows); } svst1b_u32(pg32, &dst[static_cast(x)], result); }; - auto calculate_linear_replicate = [&](svbool_t pg, uint32_t x) { - auto load_source = [&](svuint32_t x, svuint32_t y) { - return load(pg, x, y); - }; - - svfloat32x2_t coords = calculate_coordinates(x); - svfloat32_t xf = svget2(coords, 0); - svfloat32_t yf = svget2(coords, 1); - // Take the integer part, clamp it to within the dimensions of the - // source image (negative values are already saturated to 0) - svuint32_t x0 = svcvt_u32_f32_x(pg_all32, svmin_x(pg_all32, xf, xmaxf)); - svuint32_t y0 = svcvt_u32_f32_x(pg_all32, svmin_x(pg_all32, yf, ymaxf)); - - // Get fractional part, or 0 if out of range - svbool_t x_in_range = svand_z(pg_all32, svcmpge_n_f32(pg_all32, xf, 0.F), - svcmplt_f32(pg_all32, xf, xmaxf)); - svbool_t y_in_range = svand_z(pg_all32, svcmpge_n_f32(pg_all32, yf, 0.F), - svcmplt_f32(pg_all32, yf, ymaxf)); - svfloat32_t xfrac = svsel_f32( - x_in_range, svsub_f32_x(pg_all32, xf, svrintm_x(pg_all32, xf)), - svdup_n_f32(0.F)); - svfloat32_t yfrac = svsel_f32( - y_in_range, svsub_f32_x(pg_all32, yf, svrintm_x(pg_all32, yf)), - svdup_n_f32(0.F)); - - // x1 = x0 + 1, except if it's already xmax or out of range - svuint32_t x1 = svsel_u32(x_in_range, svadd_n_u32_x(pg_all32, x0, 1), x0); - svuint32_t y1 = svsel_u32(y_in_range, svadd_n_u32_x(pg_all32, y0, 1), y0); - - // Calculate offsets from coordinates (y * stride + x) - // a: top left, b: top right, c: bottom left, d: bottom right - svfloat32_t a = svcvt_f32_u32_x(pg_all32, load_source(x0, y0)); - svfloat32_t b = svcvt_f32_u32_x(pg_all32, load_source(x1, y0)); - svfloat32_t line0 = - svmla_f32_x(pg_all32, a, svsub_f32_x(pg_all32, b, a), xfrac); - svfloat32_t c = svcvt_f32_u32_x(pg_all32, load_source(x0, y1)); - svfloat32_t d = svcvt_f32_u32_x(pg_all32, load_source(x1, y1)); - svfloat32_t line1 = - svmla_f32_x(pg_all32, c, svsub_f32_x(pg_all32, d, c), xfrac); - svfloat32_t result = svmla_f32_x( - pg_all32, line0, svsub_f32_x(pg_all32, line1, line0), yfrac); - return svmin_u32_x( - pg_all32, svdup_n_u32(0xFF), - svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, result, 0.5F))); - }; - - auto calculate_linear_constant_border = [&](svbool_t pg, uint32_t x) { - svfloat32x2_t coords = calculate_coordinates(x); - svfloat32_t xf = svget2(coords, 0); - svfloat32_t yf = svget2(coords, 1); - - // Convert obviously out-of-range coordinates to values that are just beyond - // the largest permitted image width & height. This avoids the need for - // special case handling elsewhere. - svfloat32_t big = svdup_n_f32(1 << 24); - xf = svsel_f32(svcmple_f32(pg, svabs_f32_x(pg, xf), big), xf, big); - yf = svsel_f32(svcmple_f32(pg, svabs_f32_x(pg, yf), big), yf, big); - - svfloat32_t xf0 = svrintm_f32_x(pg, xf); - svfloat32_t yf0 = svrintm_f32_x(pg, yf); - - svint32_t x0 = svcvt_s32_x(pg, xf0); - svint32_t y0 = svcvt_s32_x(pg, yf0); - svint32_t x1 = svadd_s32_x(pg, x0, svdup_n_s32(1)); - svint32_t y1 = svadd_s32_x(pg, y0, svdup_n_s32(1)); - - svfloat32_t xfrac = svsub_f32_x(pg, xf, xf0); - svfloat32_t yfrac = svsub_f32_x(pg, yf, yf0); - - svfloat32_t a = - svcvt_f32_u32_x(pg, get_pixels_or_border(pg, svreinterpret_u32_s32(x0), - svreinterpret_u32_s32(y0))); - svfloat32_t b = - svcvt_f32_u32_x(pg, get_pixels_or_border(pg, svreinterpret_u32_s32(x1), - svreinterpret_u32_s32(y0))); - svfloat32_t line0 = svmla_f32_x(pg, a, svsub_f32_x(pg, b, a), xfrac); - svfloat32_t c = - svcvt_f32_u32_x(pg, get_pixels_or_border(pg, svreinterpret_u32_s32(x0), - svreinterpret_u32_s32(y1))); - svfloat32_t d = - svcvt_f32_u32_x(pg, get_pixels_or_border(pg, svreinterpret_u32_s32(x1), - svreinterpret_u32_s32(y1))); - svfloat32_t line1 = svmla_f32_x(pg, c, svsub_f32_x(pg, d, c), xfrac); - svfloat32_t result = - svmla_f32_x(pg, line0, svsub_f32_x(pg, line1, line0), yfrac); - return svcvt_u32_f32_x(pg, svrinta_f32_x(pg, result)); - }; - auto calculate_linear = [&](svbool_t pg, uint32_t x) { if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) { - return calculate_linear_replicate(pg, x); + svfloat32x2_t coords = coordinate_getter(pg, x); + return calculate_linear_replicate( + pg, coords, xmaxf, ymaxf, sv_src_stride, src_rows); } else { static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT); - return calculate_linear_constant_border(pg, x); + svfloat32x2_t coords = coordinate_getter(pg, x); + return calculate_linear_constant_border( + pg, coords, sv_border, sv_xmax, sv_ymax, sv_src_stride, src_rows); } }; - auto process_row = [&](size_t y, Columns dst) { + auto process_row = [&](size_t y) { float fy = static_cast(y); // Calculate half-transformed values at the first pixel (nominators) // tw = T6*x + T7*y + T8 @@ -291,6 +191,7 @@ void warp_perspective_operation(Rows src_rows, ty0 = svdup_n_f32(fmaf(transform[4], fy, transform[5])); tw0 = svdup_n_f32(fmaf(transform[7], fy, transform[8])); + Columns dst = dst_rows.as_columns(); LoopUnroll2 loop{dst_width, kStep}; if constexpr (Inter == KLEIDICV_INTERPOLATION_NEAREST) { loop.unroll_four_times([&](size_t x) { vector_path_nearest_4x(x, dst); }); @@ -336,54 +237,23 @@ void warp_perspective_operation(Rows src_rows, }; for (size_t y = y_begin; y < y_end; ++y) { - process_row(y, dst_rows.as_columns()); + process_row(y); ++dst_rows; } } -// Convert border_type to a template argument. -template -void warp_perspective_operation(kleidicv_border_type_t border_type, - Args &&...args) { - if (border_type == KLEIDICV_BORDER_TYPE_REPLICATE) { - warp_perspective_operation( - std::forward(args)...); - } else { - warp_perspective_operation( - std::forward(args)...); - } -} - -// Convert interpolation_type to a template argument. -template -void warp_perspective_operation( - kleidicv_interpolation_type_t interpolation_type, Args &&...args) { - if (interpolation_type == KLEIDICV_INTERPOLATION_NEAREST) { - warp_perspective_operation( - std::forward(args)...); - } else { - warp_perspective_operation( - std::forward(args)...); - } -} - // Most of the complexity comes from parameter checking. // NOLINTBEGIN(readability-function-cognitive-complexity) template KLEIDICV_LOCALLY_STREAMING kleidicv_error_t warp_perspective_stripe_sc( const T *src, size_t src_stride, size_t src_width, size_t src_height, T *dst, size_t dst_stride, size_t dst_width, size_t dst_height, - size_t y_begin, size_t y_end, const float transformation[9], - size_t channels, kleidicv_interpolation_type_t interpolation, + size_t y_begin, size_t y_end, const float transform[9], size_t channels, + kleidicv_interpolation_type_t interpolation, kleidicv_border_type_t border_type, const T *border_value) { CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); CHECK_POINTER_AND_STRIDE(dst, dst_stride, dst_height); - CHECK_POINTERS(transformation); + CHECK_POINTERS(transform); CHECK_IMAGE_SIZE(src_width, src_height); CHECK_IMAGE_SIZE(dst_width, dst_height); if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT && nullptr == border_value) { @@ -404,15 +274,10 @@ KLEIDICV_LOCALLY_STREAMING kleidicv_error_t warp_perspective_stripe_sc( dst_rows += y_begin; - if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { - warp_perspective_operation( - interpolation, border_type, src_rows, src_width, src_height, - transformation, border_value, dst_rows, dst_width, y_begin, y_end); - } else { - warp_perspective_operation( - interpolation, border_type, src_rows, src_width, src_height, - transformation, border_value, dst_rows, dst_width, y_begin, y_end); - } + remap32f_process_rows(remap_image_is_large(src_rows, src_height), + interpolation, border_type, src_rows, src_width, + src_height, border_value, dst_rows, dst_width, + y_begin, y_end, transform); return KLEIDICV_OK; } diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index 52eb762e60c8ccace05df93c01193f87ae0c4c8c..dfcdc7caec97bbfca6615946e88b1302d75c4dc8 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -428,6 +428,18 @@ kleidicv_error_t kleidicv_thread_remap_s16point5_u16( kleidicv_border_type_t border_type, const uint16_t *border_value, kleidicv_thread_multithreading); +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_remap_f32_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_remap_f32_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, float *mapx, size_t mapx_stride, float *mapy, + size_t mapy_stride, kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, const uint8_t *border_value, + kleidicv_thread_multithreading); + /// Internal - not part of the public API and its direct use is not supported. /// /// Multithreaded implementation of kleidicv_warp_perspective_u8 - see the diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index d3c77db979b33d0a31f8921fc6e9f53dd865050c..2304946d457d8711a4dc2271235510aa6318c7e7 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -757,6 +757,31 @@ kleidicv_error_t kleidicv_thread_remap_s16point5_u16( return parallel_batches(callback, mt, dst_height); } +kleidicv_error_t kleidicv_thread_remap_f32_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, + size_t channels, float *mapx, size_t mapx_stride, float *mapy, + size_t mapy_stride, kleidicv_interpolation_type_t interpolation, + kleidicv_border_type_t border_type, const uint8_t *border_value, + kleidicv_thread_multithreading mt) { + if (!kleidicv::remap_f32_is_implemented( + src_stride, src_width, src_height, dst_width, border_type, channels, + interpolation)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_remap_f32_u8( + src, src_stride, src_width, src_height, + dst + static_cast(begin * dst_stride / sizeof(uint8_t)), + dst_stride, dst_width, end - begin, channels, + mapx + static_cast(begin * mapx_stride / sizeof(float)), + mapx_stride, + mapy + static_cast(begin * mapy_stride / sizeof(float)), + mapy_stride, interpolation, border_type, border_value); + }; + return parallel_batches(callback, mt, dst_height); +} + kleidicv_error_t kleidicv_thread_warp_perspective_u8( const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, uint8_t *dst, size_t dst_stride, size_t dst_width, size_t dst_height, diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index 5e8ec82971105e09ef22826f0ed3b1f8d597916a..fa7514a17148e676b8083f76440693857758d7d6 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 @@ -82,10 +82,14 @@ Remap_S16_U8_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 16 Remap_S16_U16_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 16SC2, INTER_NEAREST, BORDER_REPLICATE)' Remap_S16Point5_U8_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 16SC2, INTER_LINEAR, BORDER_REPLICATE)' Remap_S16Point5_U16_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 16SC2, INTER_LINEAR, BORDER_REPLICATE)' +Remap_F32_U8_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_LINEAR, BORDER_REPLICATE)' + Remap_S16_U8_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 16SC2, INTER_NEAREST, BORDER_CONSTANT)' Remap_S16_U16_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 16SC2, INTER_NEAREST, BORDER_CONSTANT)' Remap_S16Point5_U8_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 16SC2, INTER_LINEAR, BORDER_CONSTANT)' Remap_S16Point5_U16_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 16SC2, INTER_LINEAR, BORDER_CONSTANT)' +Remap_F32_U8_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_LINEAR, BORDER_CONSTANT)' + WarpPerspective_Nearest: opencv_perf_imgproc '*WarpPerspective/*' '($PIXEL_FORMAT, INTER_NEAREST, BORDER_REPLICATE, 1)' WarpPerspective_Linear: opencv_perf_imgproc '*WarpPerspective/*' '($PIXEL_FORMAT, INTER_LINEAR, BORDER_REPLICATE, 1)' WarpPerspective_Nearest_Constant: opencv_perf_imgproc '*WarpPerspective/*' '($PIXEL_FORMAT, INTER_NEAREST, BORDER_CONSTANT, 1)' diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index 8482cbc9808d5b7c35795c9ed1ac0826e6bbdbdd..d98b8c9c4111510eba651572f65a293eee565051 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -104,7 +104,8 @@ IMGPROC_TEST_PATTERNS=( '*Imgproc_Dilate*' '*Imgproc_Erode*' '*Imgproc_PyramidDown*' - '*Imgproc_Remap*' + '*Imgproc_Remap.*' + '*Imgproc_Remap_Test*' '*Imgproc_Warp*' ) IMGPROC_TEST_PATTERNS_STR="$(join_strings_with_colon "${IMGPROC_TEST_PATTERNS[*]}")" diff --git a/test/api/test_remap.cpp b/test/api/test_remap.cpp index 8dd36012799bb5535a3b7f4ada96f8d19568e749..72de4a4bb0431909dee8a543f80c451bd65bc0ab 100644 --- a/test/api/test_remap.cpp +++ b/test/api/test_remap.cpp @@ -1,9 +1,13 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 #include +#include +#include +#include + #include "framework/array.h" #include "framework/generator.h" #include "framework/utils.h" @@ -524,6 +528,26 @@ class RemapS16Point5 : public testing::Test { test::Array2D expected{dst_total_width, dst_h, padding, channels}; + const int64_t kMaxVal = std::numeric_limits::max(); + auto generateSource = [&](size_t x, size_t y) { + return static_cast((x + y) % 2 ? kMaxVal : 0); + }; + for (size_t y = 0; y < src_h; ++y) { + *source.at(y, 0) = generateSource(y, 0); + *source.at(y, 1) = generateSource(y, 1); + *source.at(y, 2) = generateSource(y, 2); + *source.at(y, src_w - 3) = generateSource(y, src_w - 3); + *source.at(y, src_w - 2) = generateSource(y, src_w - 2); + *source.at(y, src_w - 1) = generateSource(y, src_w - 1); + } + for (size_t x = 0; x < src_w; ++x) { + *source.at(0, x) = generateSource(0, x); + *source.at(1, x) = generateSource(1, x); + *source.at(2, x) = generateSource(2, x); + *source.at(src_h - 3, x) = generateSource(src_h - 3, x); + *source.at(src_h - 2, x) = generateSource(src_h - 2, x); + *source.at(src_h - 1, x) = generateSource(src_h - 1, x); + } actual.fill(42); calculate_expected(source, mapxy, mapfrac, border_type, border_value, @@ -826,3 +850,534 @@ TYPED_TEST(RemapS16Point5, UnsupportedTooSmallImage) { src, 1 * sizeof(TypeParam), 1, 1, dst, 8 * sizeof(TypeParam), 7, 1, 1, mapxy, 4, mapfrac, 2, KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); } + +template +class RemapF32 : public testing::Test { + public: + static void test_random(size_t src_w, size_t src_h, size_t dst_w, + size_t dst_h, size_t channels, + kleidicv_border_type_t border_type, + const ScalarType *border_value, size_t padding) { + test::Array2D mapx(dst_w, dst_h, padding); + test::PseudoRandomNumberGenerator xcoord_generator; + mapx.fill(xcoord_generator); + test::Array2D mapy(dst_w, dst_h, padding); + test::PseudoRandomNumberGenerator ycoord_generator; + mapy.fill(ycoord_generator); + execute_test(mapx, mapy, src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, padding); + } + + static void test_outside_random(size_t src_w, size_t src_h, size_t dst_w, + size_t dst_h, size_t channels, + kleidicv_border_type_t border_type, + const ScalarType *border_value, + size_t padding) { + test::Array2D mapx(dst_w, dst_h, padding); + test::PseudoRandomNumberGeneratorFloatRange xcoord_generator{ + // static_cast(-src_w), static_cast(2 * src_w)}; -src_w + // overflow + static_cast(-static_cast(src_w)), + static_cast(2 * src_w)}; + mapx.fill(xcoord_generator); + test::Array2D mapy(dst_w, dst_h, padding); + test::PseudoRandomNumberGeneratorFloatRange ycoord_generator{ + static_cast(-static_cast(src_h)), + static_cast(2 * src_h)}; + mapy.fill(ycoord_generator); + execute_test(mapx, mapy, src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, padding); + } + + static void test_blend(size_t src_w, size_t src_h, size_t dst_w, size_t dst_h, + size_t channels, kleidicv_border_type_t border_type, + const ScalarType *border_value, size_t padding) { + test::Array2D mapx(dst_w, dst_h, padding); + test::Array2D mapy(dst_w, dst_h, padding); + for (size_t row = 0; row < dst_h; ++row) { + for (size_t column = 0; column < dst_w; ++column) { + // Use a second degree function to add a nonlinear blend to the image + auto r = static_cast(row), c = static_cast(column), + w = static_cast(dst_w), h = static_cast(dst_h); + double x = c * 2 - c * c / w; + double y = r * (w - c) / w + 4 * r / h; + *mapx.at(row, column) = std::max( + 0, std::min(static_cast(src_w - 1), static_cast(x))); + *mapy.at(row, column) = std::max( + 0, std::min(static_cast(src_h - 1), static_cast(y))); + } + } + execute_test(mapx, mapy, src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, padding); + } + + // Test coordinates with edge values that may easily overflow + static void test_corner_cases(size_t src_w, size_t src_h, size_t dst_w, + size_t dst_h, size_t channels, + kleidicv_border_type_t border_type, + const ScalarType *border_value, + size_t padding) { + test::Array2D mapx(dst_w, dst_h, padding); + test::Array2D mapy(dst_w, dst_h, padding); + const float corner_x_values[] = {std::numeric_limits::min(), + -1.8, + -0.3, + 0.2, + static_cast(src_w) - 1.5F, + static_cast(src_w) - 0.93F, + static_cast(src_w) + 0.1F, + static_cast(src_w) + 1.2F, + std::numeric_limits::max()}; + const float corner_y_values[] = {std::numeric_limits::min(), + -1.3, + -0.7, + 0.33, + 1.1, + static_cast(src_h) - 1.8F, + static_cast(src_h) - 0.77F, + static_cast(src_h) + 0.17F, + static_cast(src_h) + 1.06F, + std::numeric_limits::max()}; + const size_t nx = sizeof(corner_x_values) / sizeof(float); + const size_t ny = sizeof(corner_y_values) / sizeof(float); + size_t counter = 0; + for (size_t row = 0; row < dst_h; ++row) { + for (size_t column = 0; column < dst_w; ++column) { + *mapx.at(row, column) = corner_x_values[counter % nx]; + *mapy.at(row, column) = corner_y_values[counter % ny]; + ++counter; + } + } + + // This part is the same as execute_test() but without initializing source. + // Corner Cases use the biggest possible source. + size_t src_total_width = channels * src_w; + size_t dst_total_width = channels * dst_w; + + test::Array2D source{src_total_width, src_h, padding, channels}; + test::Array2D actual{dst_total_width, dst_h, padding, channels}; + test::Array2D expected{dst_total_width, dst_h, padding, + channels}; + + const int64_t kMaxVal = std::numeric_limits::max(); + auto generateSource = [&](size_t x, size_t y) { + return static_cast((x + y) % 2 ? kMaxVal : 27); + }; + for (size_t y = 0; y < src_h; ++y) { + *source.at(y, 0) = generateSource(y, 0); + *source.at(y, 1) = generateSource(y, 1); + *source.at(y, 2) = generateSource(y, 2); + *source.at(y, src_w - 3) = generateSource(y, src_w - 3); + *source.at(y, src_w - 2) = generateSource(y, src_w - 2); + *source.at(y, src_w - 1) = generateSource(y, src_w - 1); + } + for (size_t x = 0; x < src_w; ++x) { + *source.at(0, x) = generateSource(0, x); + *source.at(1, x) = generateSource(1, x); + *source.at(2, x) = generateSource(2, x); + *source.at(src_h - 3, x) = generateSource(src_h - 3, x); + *source.at(src_h - 2, x) = generateSource(src_h - 2, x); + *source.at(src_h - 1, x) = generateSource(src_h - 1, x); + } + + test::PseudoRandomNumberGenerator generator; + actual.fill(42); + + calculate_expected(source, mapx, mapy, border_type, border_value, expected); + + ASSERT_EQ( + KLEIDICV_OK, + kleidicv_remap_f32_u8( + source.data(), source.stride(), source.width(), source.height(), + actual.data(), actual.stride(), actual.width(), actual.height(), + channels, mapx.data(), mapx.stride(), mapy.data(), mapy.stride(), + KLEIDICV_INTERPOLATION_LINEAR, border_type, border_value)); + + EXPECT_EQ_ARRAY2D(actual, expected); + } + + private: + static void execute_test(test::Array2D &mapx, + test::Array2D &mapy, size_t src_w, + size_t src_h, size_t dst_w, size_t dst_h, + size_t channels, kleidicv_border_type_t border_type, + const ScalarType *border_value, size_t padding) { + size_t src_total_width = channels * src_w; + size_t dst_total_width = channels * dst_w; + + test::Array2D source{src_total_width, src_h, padding, channels}; + test::Array2D actual{dst_total_width, dst_h, padding, channels}; + test::Array2D expected{dst_total_width, dst_h, padding, + channels}; + test::PseudoRandomNumberGenerator generator; + source.fill(generator); + actual.fill(42); + + calculate_expected(source, mapx, mapy, border_type, border_value, expected); + + ASSERT_EQ( + KLEIDICV_OK, + kleidicv_remap_f32_u8( + source.data(), source.stride(), source.width(), source.height(), + actual.data(), actual.stride(), actual.width(), actual.height(), + channels, mapx.data(), mapx.stride(), mapy.data(), mapy.stride(), + KLEIDICV_INTERPOLATION_LINEAR, border_type, border_value)); + + EXPECT_EQ_ARRAY2D(actual, expected); + } + + static void calculate_expected(test::Array2D &src, + test::Array2D &mapx, + test::Array2D &mapy, + kleidicv_border_type_t border_type, + const ScalarType *border_value, + test::Array2D &expected) { + auto get_src = [&](ptrdiff_t x, ptrdiff_t y) { + return get_array2d_element_or_border(src, x, y, border_type, + border_value); + }; + + for (size_t row = 0; row < expected.height(); row++) { + for (size_t column = 0; column < expected.width() / src.channels(); + ++column) { + for (size_t ch = 0; ch < src.channels(); ++ch) { + float x = *mapx.at(row, column); + float y = *mapy.at(row, column); + // ptrdiff_t ix = std::floor(x); + // ptrdiff_t iy = std::floor(y); + ptrdiff_t ix = static_cast(std::max( + INT_MIN, + std::min(std::floor(x), + static_cast(KLEIDICV_MAX_IMAGE_PIXELS)))); + ptrdiff_t iy = static_cast(std::max( + INT_MIN, + std::min(std::floor(y), + static_cast(KLEIDICV_MAX_IMAGE_PIXELS)))); + float xfrac = x - std::floor(x); + float yfrac = y - std::floor(y); + float a = get_src(ix, iy)[ch]; + float b = get_src(ix + 1, iy)[ch]; + float c = get_src(ix, iy + 1)[ch]; + float d = get_src(ix + 1, iy + 1)[ch]; + float line1 = (b - a) * xfrac + a; + float line2 = (d - c) * xfrac + c; + float float_result = (line2 - line1) * yfrac + line1; + *expected.at(row, column * src.channels() + ch) = + static_cast(std::lround(float_result)); + } + } + } + } +}; + +using RemapF32ElementTypes = ::testing::Types; +TYPED_TEST_SUITE(RemapF32, RemapF32ElementTypes); + +TYPED_TEST(RemapF32, RandomNoPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + size_t channels = 1; + size_t padding = 0; + for (auto [border_type, border_value] : get_borders()) { + TestFixture::test_random(src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, padding); + } +} + +TYPED_TEST(RemapF32, BlendPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + size_t channels = 1; + size_t padding = 13; + for (auto [border_type, border_value] : get_borders()) { + TestFixture::test_blend(src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, padding); + } +} + +TYPED_TEST(RemapF32, OutsideRandomPadding) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 4; + size_t dst_w = src_w; + size_t dst_h = src_h; + size_t channels = 1; + size_t padding = 13; + for (auto [border_type, border_value] : get_borders()) { + TestFixture::test_outside_random(src_w, src_h, dst_w, dst_h, channels, + border_type, border_value, padding); + } +} + +TYPED_TEST(RemapF32, BlendBigStride) { + size_t src_w = 3 * test::Options::vector_lanes() - 1; + size_t src_h = 2; + size_t dst_w = src_w; + size_t dst_h = src_h; + size_t channels = 1; + size_t padding = 1 << 16; + for (auto [border_type, border_value] : get_borders()) { + TestFixture::test_blend(src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, padding); + } +} + +TYPED_TEST(RemapF32, CornerCases) { + size_t src_w = (1ULL << 12) - 1; + size_t src_h = (1ULL << 12) - 1; + size_t dst_w = 4; + size_t dst_h = 3 * test::Options::vector_lanes() - 1; + size_t channels = 1; + size_t padding = 17; + for (auto [border_type, border_value] : get_borders()) { + TestFixture::test_corner_cases(src_w, src_h, dst_w, dst_h, channels, + border_type, border_value, padding); + } +} + +// Test for load_src_into_floats_large +TYPED_TEST(RemapF32, CornerCasesLargeLoad) { + // TODO: It takes long to run! + size_t src_w = 1ULL << 18; + size_t src_h = 1ULL << 14; + size_t dst_w = 3 * test::Options::vector_lanes() - 1; + size_t dst_h = 4; + size_t channels = 1; + size_t padding = 1; + for (auto [border_type, border_value] : get_borders()) { + TestFixture::test_corner_cases(src_w, src_h, dst_w, dst_h, channels, + border_type, border_value, padding); + } +} + +TYPED_TEST(RemapF32, NullPointer) { + const size_t element_size = sizeof(TypeParam); + const size_t src_width = 2; + const size_t src_height = 2; + const size_t src_stride = src_width * element_size; + const size_t dst_width = 1; + const size_t dst_height = 1; + const size_t dst_stride = dst_width * element_size; + const TypeParam src[4] = {}; + TypeParam dst[1]; + const size_t channels = 1; + float mapx[1] = {}; + const size_t mapx_stride = dst_width * sizeof(float); + float mapy[1] = {}; + const size_t mapy_stride = dst_width * sizeof(float); + const TypeParam border_value[1] = {}; + test::test_null_args(kleidicv_remap_f32_u8, src, src_stride, src_width, + src_height, dst, dst_stride, dst_width, dst_height, + channels, mapx, mapx_stride, mapy, mapy_stride, + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_CONSTANT, border_value); +} + +TYPED_TEST(RemapF32, ZeroImageSize) { + const TypeParam src[1] = {}; + TypeParam dst[1]; + const size_t src_stride = sizeof(TypeParam); + const size_t dst_stride = sizeof(TypeParam); + float mapx[1] = {}; + float mapy[1] = {}; + const size_t mapx_stride = sizeof(float); + const size_t mapy_stride = sizeof(float); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, src_stride, 0, 1, dst, dst_stride, 0, 1, + 1, mapx, mapx_stride, mapy, mapy_stride, + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, src_stride, 1, 0, dst, dst_stride, 1, 0, + 1, mapx, mapx_stride, mapy, mapy_stride, + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, InvalidImageSize) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[1]; + float mapx[1] = {}; + float mapy[1] = {}; + + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, + dst, element_size, 1, 1, 1, mapx, sizeof(float), + mapy, sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, dst, element_size, + 1, 1, 1, mapx, sizeof(float), mapy, + sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, 1, 1, dst, element_size, + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, mapx, + sizeof(float), mapy, sizeof(float), + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); + + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, 1, 1, dst, element_size, + KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, 1, mapx, sizeof(float), + mapy, sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedTwoChannels) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + const size_t channels = 2; + + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, element_size, 1, 1, dst, 16 * element_size, 16, + 1, channels, mapx, 16 * sizeof(float), mapy, + 16 * sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedInterpolationTypeNEAREST) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, element_size, 1, 1, dst, 16 * element_size, 16, + 1, 1, mapx, 16 * sizeof(float), mapy, + 16 * sizeof(float), KLEIDICV_INTERPOLATION_NEAREST, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedTooSmallImage) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, element_size, 1, 1, dst, 16 * element_size, 3, + 1, 1, mapx, 16 * sizeof(float), mapy, + 16 * sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedBigStride) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + const uint64_t src_stride = + static_cast(std::numeric_limits::max()) + 1; + + EXPECT_EQ( + KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, src_stride, 1, 1, dst, 16 * element_size, 16, + 1, 1, mapx, 16 * sizeof(float), mapy, + 16 * sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedBigSourceWidth) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, 1ULL << 24, 1, dst, + 16 * element_size, 16, 1, 1, mapx, + 16 * sizeof(float), mapy, 16 * sizeof(float), + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, element_size, (1ULL << 32) + 1, 1, dst, + 16 * element_size, 16, 1, 1, mapx, + 16 * sizeof(float), mapy, 16 * sizeof(float), + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedBigSourceHeight) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, 1, 1ULL << 24, dst, + 16 * element_size, 16, 1, 1, mapx, + 16 * sizeof(float), mapy, 16 * sizeof(float), + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_remap_f32_u8(src, element_size, 1, (1ULL << 32) + 1, dst, + 16 * element_size, 16, 1, 1, mapx, + 16 * sizeof(float), mapy, 16 * sizeof(float), + KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedBigDestinationWidth) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, 1, 1, dst, 16 * element_size, + 1ULL << 24, 1, 1, mapx, 16 * sizeof(float), mapy, + 16 * sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, UnsupportedBigDestinationHeight) { + const size_t element_size = sizeof(TypeParam); + const TypeParam src[1] = {}; + TypeParam dst[16]; + float mapx[16] = {}; + float mapy[16] = {}; + + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + kleidicv_remap_f32_u8(src, element_size, 1, 1, dst, 16 * element_size, 16, + 1ULL << 24, 1, mapx, 16 * sizeof(float), mapy, + 16 * sizeof(float), KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); +} + +TYPED_TEST(RemapF32, Misalignment) { + const size_t element_size = sizeof(TypeParam); + if (element_size == 1) { + // misalignment impossible + GTEST_SKIP(); + } + + // Will be needed when supporting uint16_t +} diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 3861d8028065cc2b26b6837fc1c0b0c95ee3f398..c930b4757678e9e452636c4bf9731bcab6bbc29f 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -244,6 +244,80 @@ class Thread : public testing::TestWithParam

{ EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); } + template + void check_remap_f32(SingleThreadedFunc single_threaded_func, + MultithreadedFunc multithreaded_func, size_t channels, + Args... args) { + unsigned test_width = 0, height = 0, thread_count = 0; + std::tie(test_width, height, thread_count) = GetParam(); + const unsigned src_width = 300, src_height = 300; + // width < 4 are not supported, that's not tested here + size_t width = test_width + 4; + test::Array2D src(size_t{src_width} * channels, src_height); + test::Array2D mapx(width * 2, height); + test::Array2D mapy(width, height); + test::Array2D dst_single(width * channels, height), + dst_multi(width * channels, height); + + test::PseudoRandomNumberGenerator src_generator; + src.fill(src_generator); + test::PseudoRandomNumberGeneratorFloatRange xcoord_generator{ + 0, std::min(static_cast(src_height - 1), + static_cast(src_width - 1))}; + mapx.fill(xcoord_generator); + test::PseudoRandomNumberGeneratorFloatRange ycoord_generator{ + 0, std::min(static_cast(src_height - 1), + static_cast(src_width - 1))}; + mapy.fill(ycoord_generator); + + kleidicv_error_t single_result = single_threaded_func( + src.data(), src.stride(), src_width, src_height, dst_single.data(), + dst_single.stride(), width, height, channels, mapx.data(), + mapx.stride(), mapy.data(), mapy.stride(), args...); + + kleidicv_error_t multi_result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst_multi.data(), + dst_multi.stride(), width, height, channels, mapx.data(), mapx.stride(), + mapy.data(), mapy.stride(), args..., + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, single_result); + EXPECT_EQ(KLEIDICV_OK, multi_result); + EXPECT_EQ_ARRAY2D(dst_multi, dst_single); + } + + template + void check_remap_f32_not_implemented(MultithreadedFunc multithreaded_func, + size_t channels, Args... args) { + unsigned test_width = 0, height = 0, thread_count = 0; + std::tie(test_width, height, thread_count) = GetParam(); + const unsigned src_width = 300, src_height = 300; + // width < 4 are not supported, that's not tested here + size_t width = test_width + 4; + test::Array2D src(size_t{src_width} * channels, src_height); + test::Array2D mapx(width * 2, height); + test::Array2D mapy(width, height); + test::Array2D dst_small(test_width * channels, height), + dst(width * channels, height); + + kleidicv_error_t result = multithreaded_func( + src.data(), src.stride(), src_width, src_height, dst.data(), + dst.stride(), width, height, channels, mapx.data(), mapx.stride(), + mapy.data(), mapy.stride(), args..., + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); + + result = multithreaded_func(src.data(), src.stride(), src_width, src_height, + dst_small.data(), dst_small.stride(), + test_width, height, channels, mapx.data(), + mapx.stride(), mapy.data(), mapy.stride(), + args..., get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, result); + } + template void check_warp_perspective(SingleThreadedFunc single_threaded_func, @@ -688,6 +762,32 @@ TEST_P(Thread, remap_s16point5_u16_not_implemented) { &border_value); } +TEST_P(Thread, remap_f32_u8_border_replicate) { + check_remap_f32(kleidicv_remap_f32_u8, kleidicv_thread_remap_f32_u8, + 1, KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, nullptr); +} + +TEST_P(Thread, remap_f32_u8_border_constant) { + const uint8_t border_value = 0; + check_remap_f32(kleidicv_remap_f32_u8, kleidicv_thread_remap_f32_u8, + 1, KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_CONSTANT, &border_value); +} + +TEST_P(Thread, remap_f32_u8_not_implemented) { + const uint8_t border_value = 0; + check_remap_f32_not_implemented( + kleidicv_thread_remap_f32_u8, 2, KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REPLICATE, &border_value); + check_remap_f32_not_implemented( + kleidicv_thread_remap_f32_u8, 1, KLEIDICV_INTERPOLATION_NEAREST, + KLEIDICV_BORDER_TYPE_REPLICATE, &border_value); + check_remap_f32_not_implemented( + kleidicv_thread_remap_f32_u8, 1, KLEIDICV_INTERPOLATION_LINEAR, + KLEIDICV_BORDER_TYPE_REFLECT, &border_value); +} + TEST_P(Thread, warp_perspective_u8_border_replicate) { const uint8_t border_value = 0; check_warp_perspective(