diff --git a/CHANGELOG.md b/CHANGELOG.md index 87be149c01a407430642063093cd953eb5186dda..4d3213a9bcf96834767c50593e15681d6b472457 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ This changelog aims to follow the guiding principles of - Remap implementations with - Integer coordinates with nearest neighbour method - Fixed-point coordinates with linear interpolation - - Floating-point coordinates with linear interpolation + - Floating-point coordinates with nearest neighbour and linear interpolation - Replicated and constant borders - 1-channel only - u8 and u16 images diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index c61ef14a4a2b4e59c7c1bfaa4c1ed415600deb12..94f8dd46b5fcdbd7143b347e3ff28e573e8602e1 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -857,30 +857,60 @@ static void remap_f32(Function f, MapFuncX mfx, MapFuncY mfy, size_t channels, } \ BENCHMARK(benchname) -BENCH_REMAP_F32(remap_f32_u8_random, remap_f32_u8, get_random_mapx, - get_random_mapy, 1, KLEIDICV_INTERPOLATION_LINEAR, - KLEIDICV_BORDER_TYPE_REPLICATE, uint8_t); +BENCH_REMAP_F32(remap_f32_u8_linear_random, remap_f32_u8, + get_random_mapx, get_random_mapy, 1, + KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, + uint8_t); -BENCH_REMAP_F32(remap_f32_u8_blend, remap_f32_u8, get_blend_mapx, +BENCH_REMAP_F32(remap_f32_u8_linear_blend, remap_f32_u8, get_blend_mapx, get_blend_mapy, 1, KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, uint8_t); -BENCH_REMAP_F32(remap_f32_u8_flip, remap_f32_u8, get_flip_mapx, +BENCH_REMAP_F32(remap_f32_u8_linear_flip, remap_f32_u8, get_flip_mapx, get_flip_mapy, 1, KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, uint8_t); -BENCH_REMAP_F32(remap_f32_u16_random, remap_f32_u16, get_random_mapx, - get_random_mapy, 1, KLEIDICV_INTERPOLATION_LINEAR, - KLEIDICV_BORDER_TYPE_REPLICATE, uint16_t); +BENCH_REMAP_F32(remap_f32_u16_linear_random, remap_f32_u16, + get_random_mapx, get_random_mapy, 1, + KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, + uint16_t); -BENCH_REMAP_F32(remap_f32_u16_blend, remap_f32_u16, get_blend_mapx, - get_blend_mapy, 1, KLEIDICV_INTERPOLATION_LINEAR, - KLEIDICV_BORDER_TYPE_REPLICATE, uint16_t); +BENCH_REMAP_F32(remap_f32_u16_linear_blend, remap_f32_u16, + get_blend_mapx, get_blend_mapy, 1, + KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, + uint16_t); -BENCH_REMAP_F32(remap_f32_u16_flip, remap_f32_u16, get_flip_mapx, +BENCH_REMAP_F32(remap_f32_u16_linear_flip, remap_f32_u16, get_flip_mapx, get_flip_mapy, 1, KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, uint16_t); +BENCH_REMAP_F32(remap_f32_u8_nearest_random, remap_f32_u8, + get_random_mapx, get_random_mapy, 1, + KLEIDICV_INTERPOLATION_NEAREST, KLEIDICV_BORDER_TYPE_REPLICATE, + uint8_t); + +BENCH_REMAP_F32(remap_f32_u8_nearest_blend, remap_f32_u8, get_blend_mapx, + get_blend_mapy, 1, KLEIDICV_INTERPOLATION_NEAREST, + KLEIDICV_BORDER_TYPE_REPLICATE, uint8_t); + +BENCH_REMAP_F32(remap_f32_u8_nearest_flip, remap_f32_u8, get_flip_mapx, + get_flip_mapy, 1, KLEIDICV_INTERPOLATION_NEAREST, + KLEIDICV_BORDER_TYPE_REPLICATE, uint8_t); + +BENCH_REMAP_F32(remap_f32_u16_nearest_random, remap_f32_u16, + get_random_mapx, get_random_mapy, 1, + KLEIDICV_INTERPOLATION_NEAREST, KLEIDICV_BORDER_TYPE_REPLICATE, + uint16_t); + +BENCH_REMAP_F32(remap_f32_u16_nearest_blend, remap_f32_u16, + get_blend_mapx, get_blend_mapy, 1, + KLEIDICV_INTERPOLATION_NEAREST, KLEIDICV_BORDER_TYPE_REPLICATE, + uint16_t); + +BENCH_REMAP_F32(remap_f32_u16_nearest_flip, remap_f32_u16, get_flip_mapx, + get_flip_mapy, 1, KLEIDICV_INTERPOLATION_NEAREST, + KLEIDICV_BORDER_TYPE_REPLICATE, uint16_t); + // clang-format off static const float transform_identity[] = { 1.0, 0, 0, diff --git a/conformity/opencv/test_remap.cpp b/conformity/opencv/test_remap.cpp index 316ebfdfe2077dc916357be11de6b1fc04b2e294..face699ec1f7f0080db7d6bda130a782bbad2808 100644 --- a/conformity/opencv/test_remap.cpp +++ b/conformity/opencv/test_remap.cpp @@ -161,10 +161,29 @@ bool test_remap_f32(int index, RecreatedMessageQueue& request_queue, for (size_t h = 5; h <= kMaxHeight * 2; h += 2) { cv::Mat map_mat(h * 2, w, CV_32FC1); cv::Mat mapx_mat = map_mat.rowRange(0, h); - rng.fill(mapx_mat, cv::RNG::UNIFORM, -3, kMaxWidth + 3); - cv::Mat mapy_mat = map_mat.rowRange(h, map_mat.rows); - rng.fill(mapy_mat, cv::RNG::UNIFORM, -3, kMaxHeight + 3); + for (size_t y = 0; y < h; ++y) { + for (size_t x = 0; x < w; ++x) { + // Values from -0.49 to 0.49, so exactly 0.5 is excluded + + // Reason: When rounding floating point values to integer, OpenCV does + // scalar rounding that works differently based on the rounding + // environment. E.g. it can use "Rounding to nearest, ties to even", + // while KleidiCV always uses "Rounding to nearest, towards plus + // infinity". To prevent these differences, values with exactly 0.5 + // fractional part are excluded. + float divisor = (1.01 * 0x1p32); + float epsilon = 0x1p-16; + float fractionX = rng.next() / divisor - 0.5F + epsilon; + float fractionY = rng.next() / divisor - 0.5F + epsilon; + mapx_mat.at(y, x) = + (static_cast(rng.next() % (kMaxWidth + 6)) - 3) + + fractionX; + mapy_mat.at(y, x) = + (static_cast(rng.next() % (kMaxHeight + 6)) - 3) + + fractionY; + } + } cv::Mat actual_mat = exec_remap_f32(map_mat); @@ -235,10 +254,14 @@ std::vector& remap_tests_get() { TEST("RemapS16Point5 uint8 Constant", (test_remap_s16point5), (exec_remap_s16point5)), TEST("RemapS16Point5 uint16 Constant", (test_remap_s16point5), (exec_remap_s16point5)), - TEST("RemapF32 uint8 Replicate", (test_remap_f32), (exec_remap_f32)), - TEST("RemapF32 uint16 Replicate", (test_remap_f32), (exec_remap_f32)), - TEST("RemapF32 uint8 Constant", (test_remap_f32), (exec_remap_f32)), - TEST("RemapF32 uint16 Constant", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint8 Replicate Linear", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint16 Replicate Linear", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint8 Constant Linear", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint16 Constant Linear", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint8 Replicate Nearest", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint16 Replicate Nearest", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint8 Constant Nearest", (test_remap_f32), (exec_remap_f32)), + TEST("RemapF32 uint16 Constant Nearest", (test_remap_f32), (exec_remap_f32)), }; // clang-format on return tests; diff --git a/doc/opencv.md b/doc/opencv.md index 2ddae12c6a463fd4781273cd20641abf06ae6fdb..a972dbdacd953e1beca50d51e906df08ac914b16 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -223,7 +223,7 @@ Supported map configurations: * `map1` is 32FC1 and `map2` is 32FC1: * `map1` is x coordinates (column) * `map2` is y coordinates (row) - * supported `interpolation`: `INTER_LINEAR` only + * supported `interpolation`: `INTER_NEAREST` and `INTER_LINEAR` ### [`cv::warpPerspective()`](https://docs.opencv.org/4.10.0/da/d54/group__imgproc__transform.html#gaf73673a7e8e18ec6963e3774e6a94b87) Performs a perspective transformation on an image. diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 094cc8c41b33b703bb908df280fc528fbbdcdf28..294414163f20314982b5b31d4fdd90bb1e2f9e98 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1873,6 +1873,7 @@ KLEIDICV_API_DECLARATION(kleidicv_remap_s16point5_u16, const uint16_t *src, /// sizeof(float)`, except for single-row images. /// @param interpolation Interpolation algorithm. Supported types: \n /// - @ref KLEIDICV_INTERPOLATION_LINEAR +/// - @ref KLEIDICV_INTERPOLATION_NEAREST /// @param border_type Way of handling the border. The supported border types /// are: \n /// - @ref KLEIDICV_BORDER_TYPE_REPLICATE diff --git a/kleidicv/include/kleidicv/transform/remap.h b/kleidicv/include/kleidicv/transform/remap.h index 6ef3447152dae74e16403d7808cbc9d99dc6e84d..dcf43bd6401eb902417ae003810a5aefbc19e506 100644 --- a/kleidicv/include/kleidicv/transform/remap.h +++ b/kleidicv/include/kleidicv/transform/remap.h @@ -65,7 +65,9 @@ inline bool remap_f32_is_implemented( static_cast(std::numeric_limits::max()) + 1 && (border_type == KLEIDICV_BORDER_TYPE_REPLICATE || border_type == KLEIDICV_BORDER_TYPE_CONSTANT) && - channels == 1 && interpolation == KLEIDICV_INTERPOLATION_LINEAR); + channels == 1 && + (interpolation == KLEIDICV_INTERPOLATION_LINEAR || + interpolation == KLEIDICV_INTERPOLATION_NEAREST)); } else { return false; } diff --git a/kleidicv/src/transform/remap_neon.cpp b/kleidicv/src/transform/remap_neon.cpp index d61561777d79a75d5966324fa814b432855188f9..936d42139b8768a48823fa5c83f6f7842aee579f 100644 --- a/kleidicv/src/transform/remap_neon.cpp +++ b/kleidicv/src/transform/remap_neon.cpp @@ -1510,6 +1510,465 @@ class RemapF32ConstantBorder { }; // end of class RemapF32ConstantBorder // NOLINTEND(readability-function-cognitive-complexity) +template +class RemapF32NearestReplicate; + +template +class RemapF32NearestReplicate { + public: + using ScalarType = uint8_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; // float32x4_t + + RemapF32NearestReplicate(Rows src_rows, size_t src_width, + size_t src_height) + : src_rows_{src_rows}, + v_src_stride_{vdupq_n_u32(static_cast(src_rows_.stride()))}, + v_xmax_{vdupq_n_u32(static_cast(src_width - 1))}, + v_ymax_{vdupq_n_u32(static_cast(src_height - 1))} {} + + void get_map_coordinates(Columns mapx, Columns mapy, + uint32x4_t &x, uint32x4_t &y) { + MapVectorType x_raw = vld1q_f32(&mapx[0]); + MapVectorType y_raw = vld1q_f32(&mapy[0]); + + MapVectorType bias = vdupq_n_f32(0.5F); + // Round to nearest positive value + uint32x4_t x_nearest = vcvtmq_u32_f32(vaddq_f32(x_raw, bias)); + uint32x4_t y_nearest = vcvtmq_u32_f32(vaddq_f32(y_raw, bias)); + + // Clamp coordinates to within the dimensions of the source image + x = vmaxq_u32(vdupq_n_u32(0), vminq_u32(x_nearest, v_xmax_)); + y = vmaxq_u32(vdupq_n_u32(0), vminq_u32(y_nearest, v_ymax_)); + } + + uint8x8_t load_pixels_large(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * stride + x) + uint64x2_t indices_low = + vmlal_u32(vmovl_u32(vget_low_u32(x)), vget_low_u32(y), + vget_low_u32(v_src_stride_)); + uint64x2_t indices_high = + vmlal_high_u32(vmovl_high_u32(x), y, v_src_stride_); + + uint8x8_t pixels = {src_rows_[vgetq_lane_u64(indices_low, 0)], + src_rows_[vgetq_lane_u64(indices_low, 1)], + src_rows_[vgetq_lane_u64(indices_high, 0)], + src_rows_[vgetq_lane_u64(indices_high, 1)], + 0, + 0, + 0, + 0}; + return pixels; + } + + uint8x8_t load_pixels_small(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * stride + x) + uint32x4_t indices = vmlaq_u32(x, y, v_src_stride_); + + uint8x8_t pixels = {src_rows_[vgetq_lane_u32(indices, 0)], + src_rows_[vgetq_lane_u32(indices, 1)], + src_rows_[vgetq_lane_u32(indices, 2)], + src_rows_[vgetq_lane_u32(indices, 3)], + 0, + 0, + 0, + 0}; + return pixels; + } + + void store_pixels(uint8x8_t pixels, Columns dst) { + dst[0] = vget_lane_u8(pixels, 0); + dst[1] = vget_lane_u8(pixels, 1); + dst[2] = vget_lane_u8(pixels, 2); + dst[3] = vget_lane_u8(pixels, 3); + } + + void process_row(size_t width, Columns mapx, + Columns mapy, Columns dst) { + const size_t kStep = VecTraits::num_lanes(); + + auto vector_path = [&](size_t step) { + uint32x4_t x, y; + get_map_coordinates(mapx, mapy, x, y); + + uint8x8_t pixels; + if constexpr (IsLarge) { + pixels = load_pixels_large(x, y); + } else { + pixels = load_pixels_small(x, y); + } + + store_pixels(pixels, dst); + + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + + LoopUnroll loop{width, kStep}; + loop.unroll_once(vector_path); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapx -= back_step; + mapy -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t step) { vector_path(step); }); + } + + private: + Rows src_rows_; + uint32x4_t v_src_stride_; + uint32x4_t v_xmax_; + uint32x4_t v_ymax_; +}; // end of class RemapF32NearestReplicate + +template +class RemapF32NearestReplicate { + public: + using ScalarType = uint16_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; // float32x4_t + + RemapF32NearestReplicate(Rows src_rows, size_t src_width, + size_t src_height) + : src_rows_{src_rows}, + v_src_element_stride_{vdupq_n_u32( + static_cast(src_rows_.stride() / sizeof(ScalarType)))}, + v_xmax_{vdupq_n_u32(static_cast(src_width - 1))}, + v_ymax_{vdupq_n_u32(static_cast(src_height - 1))} {} + + void get_map_coordinates(Columns mapx, Columns mapy, + uint32x4_t &x, uint32x4_t &y) { + MapVectorType x_raw = vld1q_f32(&mapx[0]); + MapVectorType y_raw = vld1q_f32(&mapy[0]); + + MapVectorType bias = vdupq_n_f32(0.5F); + // Round to nearest positive value + uint32x4_t x_nearest = vcvtmq_u32_f32(vaddq_f32(x_raw, bias)); + uint32x4_t y_nearest = vcvtmq_u32_f32(vaddq_f32(y_raw, bias)); + + // Clamp coordinates to within the dimensions of the source image + x = vmaxq_u32(vdupq_n_u32(0), vminq_u32(x_nearest, v_xmax_)); + y = vmaxq_u32(vdupq_n_u32(0), vminq_u32(y_nearest, v_ymax_)); + } + + uint16x4_t load_pixels_large(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * element_stride + x) + uint64x2_t indices_low = + vmlal_u32(vmovl_u32(vget_low_u32(x)), vget_low_u32(y), + vget_low_u32(v_src_element_stride_)); + uint64x2_t indices_high = + vmlal_high_u32(vmovl_high_u32(x), y, v_src_element_stride_); + + uint16x4_t pixels = {src_rows_[vgetq_lane_u64(indices_low, 0)], + src_rows_[vgetq_lane_u64(indices_low, 1)], + src_rows_[vgetq_lane_u64(indices_high, 0)], + src_rows_[vgetq_lane_u64(indices_high, 1)]}; + return pixels; + } + + uint16x4_t load_pixels_small(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * element_stride + x) + uint32x4_t indices = vmlaq_u32(x, y, v_src_element_stride_); + + uint16x4_t pixels = {src_rows_[vgetq_lane_u32(indices, 0)], + src_rows_[vgetq_lane_u32(indices, 1)], + src_rows_[vgetq_lane_u32(indices, 2)], + src_rows_[vgetq_lane_u32(indices, 3)]}; + return pixels; + } + + void store_pixels(uint16x4_t pixels, Columns dst) { + vst1_u16(&dst[0], pixels); + } + + void process_row(size_t width, Columns mapx, + Columns mapy, Columns dst) { + const size_t kStep = VecTraits::num_lanes(); + + auto vector_path = [&](size_t step) { + uint32x4_t x, y; + get_map_coordinates(mapx, mapy, x, y); + + uint16x4_t pixels; + if constexpr (IsLarge) { + pixels = load_pixels_large(x, y); + } else { + pixels = load_pixels_small(x, y); + } + + store_pixels(pixels, dst); + + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + + LoopUnroll loop{width, kStep}; + loop.unroll_once(vector_path); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapx -= back_step; + mapy -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t step) { vector_path(step); }); + } + + private: + Rows src_rows_; + uint32x4_t v_src_element_stride_; + uint32x4_t v_xmax_; + uint32x4_t v_ymax_; +}; // end of class RemapF32NearestReplicate + +template +class RemapF32NearestConstant; + +template +class RemapF32NearestConstant { + public: + using ScalarType = uint8_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; // float32x4_t + + RemapF32NearestConstant(Rows src_rows, size_t src_width, + size_t src_height, const ScalarType *border_value) + : src_rows_{src_rows}, + v_src_stride_{vdupq_n_u32(static_cast(src_rows_.stride()))}, + v_width_{vdupq_n_u32(static_cast(src_width))}, + v_height_{vdupq_n_u32(static_cast(src_height))}, + v_border_{vdup_n_u8(*border_value)} {} + + void get_map_coordinates(Columns mapx, Columns mapy, + uint32x4_t &x, uint32x4_t &y, uint32x4_t &in_range) { + MapVectorType x_raw = vld1q_f32(&mapx[0]); + MapVectorType y_raw = vld1q_f32(&mapy[0]); + + MapVectorType bias = vdupq_n_f32(0.5F); + float32x4_t x_biased = vaddq_f32(x_raw, bias); + float32x4_t y_biased = vaddq_f32(y_raw, bias); + + // Round to nearest positive value + uint32x4_t x_nearest = vcvtmq_u32_f32(x_biased); + uint32x4_t y_nearest = vcvtmq_u32_f32(y_biased); + + // Find whether coordinates are within the image dimensions. + uint32x4_t above_zero = + vandq_u32(vcgezq_f32(x_biased), vcgezq_f32(y_biased)); + uint32x4_t below_limits = vandq_u32(vcltq_u32(x_nearest, v_width_), + vcltq_u32(y_nearest, v_height_)); + in_range = vandq_u32(above_zero, below_limits); + + // Zero out-of-range coordinates. + x = vandq_u32(in_range, x_nearest); + y = vandq_u32(in_range, y_nearest); + } + + uint8x8_t load_pixels_large(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * stride + x) + uint64x2_t indices_low = + vmlal_u32(vmovl_u32(vget_low_u32(x)), vget_low_u32(y), + vget_low_u32(v_src_stride_)); + uint64x2_t indices_high = + vmlal_high_u32(vmovl_high_u32(x), y, v_src_stride_); + + uint8x8_t pixels = {src_rows_[vgetq_lane_u64(indices_low, 0)], + src_rows_[vgetq_lane_u64(indices_low, 1)], + src_rows_[vgetq_lane_u64(indices_high, 0)], + src_rows_[vgetq_lane_u64(indices_high, 1)], + 0, + 0, + 0, + 0}; + return pixels; + } + + uint8x8_t load_pixels_small(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * stride + x) + uint32x4_t indices = vmlaq_u32(x, y, v_src_stride_); + + uint8x8_t pixels = {src_rows_[vgetq_lane_u32(indices, 0)], + src_rows_[vgetq_lane_u32(indices, 1)], + src_rows_[vgetq_lane_u32(indices, 2)], + src_rows_[vgetq_lane_u32(indices, 3)], + 0, + 0, + 0, + 0}; + return pixels; + } + + void store_pixels(uint8x8_t pixels, Columns dst) { + dst[0] = vget_lane_u8(pixels, 0); + dst[1] = vget_lane_u8(pixels, 1); + dst[2] = vget_lane_u8(pixels, 2); + dst[3] = vget_lane_u8(pixels, 3); + } + + void process_row(size_t width, Columns mapx, + Columns mapy, Columns dst) { + const size_t kStep = VecTraits::num_lanes(); + + auto vector_path = [&](size_t step) { + uint32x4_t x, y; + uint32x4_t in_range; + get_map_coordinates(mapx, mapy, x, y, in_range); + + uint8x8_t pixels; + if constexpr (IsLarge) { + pixels = load_pixels_large(x, y); + } else { + pixels = load_pixels_small(x, y); + } + + // Select between source pixels and border colour + uint8x8_t in_range_narrowed = + vmovn_u16(vcombine_u16(vmovn_u32(in_range), vdup_n_u16(0))); + uint8x8_t pixels_or_border = + vbsl_u8(in_range_narrowed, pixels, v_border_); + + store_pixels(pixels_or_border, dst); + + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + + LoopUnroll loop{width, kStep}; + loop.unroll_once(vector_path); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapx -= back_step; + mapy -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t step) { vector_path(step); }); + } + + private: + Rows src_rows_; + uint32x4_t v_src_stride_; + uint32x4_t v_width_; + uint32x4_t v_height_; + uint8x8_t v_border_; +}; // end of class RemapF32NearestConstant + +template +class RemapF32NearestConstant { + public: + using ScalarType = uint16_t; + using MapVecTraits = neon::VecTraits; + using MapVectorType = typename MapVecTraits::VectorType; // float32x4_t + + RemapF32NearestConstant(Rows src_rows, size_t src_width, + size_t src_height, const ScalarType *border_value) + : src_rows_{src_rows}, + v_src_element_stride_{vdupq_n_u32( + static_cast(src_rows_.stride() / sizeof(ScalarType)))}, + v_width_{vdupq_n_u32(static_cast(src_width))}, + v_height_{vdupq_n_u32(static_cast(src_height))}, + v_border_{vdup_n_u16(*border_value)} {} + + void get_map_coordinates(Columns mapx, Columns mapy, + uint32x4_t &x, uint32x4_t &y, uint32x4_t &in_range) { + MapVectorType x_raw = vld1q_f32(&mapx[0]); + MapVectorType y_raw = vld1q_f32(&mapy[0]); + + MapVectorType bias = vdupq_n_f32(0.5F); + float32x4_t x_biased = vaddq_f32(x_raw, bias); + float32x4_t y_biased = vaddq_f32(y_raw, bias); + + // Round to nearest positive value + uint32x4_t x_nearest = vcvtmq_u32_f32(x_biased); + uint32x4_t y_nearest = vcvtmq_u32_f32(y_biased); + + // Find whether coordinates are within the image dimensions. + uint32x4_t above_zero = + vandq_u32(vcgezq_f32(x_biased), vcgezq_f32(y_biased)); + uint32x4_t below_limits = vandq_u32(vcltq_u32(x_nearest, v_width_), + vcltq_u32(y_nearest, v_height_)); + in_range = vandq_u32(above_zero, below_limits); + + // Zero out-of-range coordinates. + x = vandq_u32(in_range, x_nearest); + y = vandq_u32(in_range, y_nearest); + } + + uint16x4_t load_pixels_large(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * stride + x) + uint64x2_t indices_low = + vmlal_u32(vmovl_u32(vget_low_u32(x)), vget_low_u32(y), + vget_low_u32(v_src_element_stride_)); + uint64x2_t indices_high = + vmlal_high_u32(vmovl_high_u32(x), y, v_src_element_stride_); + + uint16x4_t pixels = {src_rows_[vgetq_lane_u64(indices_low, 0)], + src_rows_[vgetq_lane_u64(indices_low, 1)], + src_rows_[vgetq_lane_u64(indices_high, 0)], + src_rows_[vgetq_lane_u64(indices_high, 1)]}; + return pixels; + } + + uint16x4_t load_pixels_small(uint32x4_t x, uint32x4_t y) { + // Calculate offsets from coordinates (y * stride + x) + uint32x4_t indices = vmlaq_u32(x, y, v_src_element_stride_); + + uint16x4_t pixels = {src_rows_[vgetq_lane_u32(indices, 0)], + src_rows_[vgetq_lane_u32(indices, 1)], + src_rows_[vgetq_lane_u32(indices, 2)], + src_rows_[vgetq_lane_u32(indices, 3)]}; + return pixels; + } + + void store_pixels(uint16x4_t pixels, Columns dst) { + vst1_u16(&dst[0], pixels); + } + + void process_row(size_t width, Columns mapx, + Columns mapy, Columns dst) { + const size_t kStep = VecTraits::num_lanes(); + + auto vector_path = [&](size_t step) { + uint32x4_t x, y; + uint32x4_t in_range; + get_map_coordinates(mapx, mapy, x, y, in_range); + + uint16x4_t pixels; + if constexpr (IsLarge) { + pixels = load_pixels_large(x, y); + } else { + pixels = load_pixels_small(x, y); + } + + // Select between source pixels and border colour + uint16x4_t in_range_narrowed = vmovn_u32(in_range); + uint16x4_t pixels_or_border = + vbsl_u16(in_range_narrowed, pixels, v_border_); + + store_pixels(pixels_or_border, dst); + + mapx += ptrdiff_t(step); + mapy += ptrdiff_t(step); + dst += ptrdiff_t(step); + }; + + LoopUnroll loop{width, kStep}; + loop.unroll_once(vector_path); + ptrdiff_t back_step = static_cast(loop.step()) - + static_cast(loop.remaining_length()); + mapx -= back_step; + mapy -= back_step; + dst -= back_step; + loop.remaining([&](size_t, size_t step) { vector_path(step); }); + } + + private: + Rows src_rows_; + uint32x4_t v_src_element_stride_; + uint32x4_t v_width_; + uint32x4_t v_height_; + uint16x4_t v_border_; +}; // end of class RemapF32NearestConstant + // Most of the complexity comes from parameter checking. // NOLINTBEGIN(readability-function-cognitive-complexity) template @@ -1549,24 +2008,50 @@ kleidicv_error_t remap_f32(const T *src, size_t src_stride, size_t src_width, Rows dst_rows{dst, dst_stride, channels}; Rectangle rect{dst_width, dst_height}; - if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT) { - if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { - RemapF32ConstantBorder operation{src_rows, src_width, src_height, - border_value}; - zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + if (interpolation == KLEIDICV_INTERPOLATION_LINEAR) { + if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT) { + if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { + RemapF32ConstantBorder operation{src_rows, src_width, + src_height, border_value}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } else { + RemapF32ConstantBorder operation{src_rows, src_width, + src_height, border_value}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } } else { - RemapF32ConstantBorder operation{src_rows, src_width, - src_height, border_value}; - zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + assert(border_type == KLEIDICV_BORDER_TYPE_REPLICATE); + if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { + RemapF32Replicate operation{src_rows, src_width, src_height}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } else { + RemapF32Replicate operation{src_rows, src_width, src_height}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } } } else { - assert(border_type == KLEIDICV_BORDER_TYPE_REPLICATE); - if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { - RemapF32Replicate operation{src_rows, src_width, src_height}; - zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + assert(interpolation == KLEIDICV_INTERPOLATION_NEAREST); + if (border_type == KLEIDICV_BORDER_TYPE_CONSTANT) { + if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { + RemapF32NearestConstant operation{src_rows, src_width, + src_height, border_value}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } else { + RemapF32NearestConstant operation{src_rows, src_width, + src_height, border_value}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } } else { - RemapF32Replicate operation{src_rows, src_width, src_height}; - zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + assert(border_type == KLEIDICV_BORDER_TYPE_REPLICATE); + if (KLEIDICV_UNLIKELY(src_rows.stride() * src_height >= (1ULL << 32))) { + RemapF32NearestReplicate operation{src_rows, src_width, + src_height}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } else { + RemapF32NearestReplicate operation{src_rows, src_width, + src_height}; + zip_rows(operation, rect, mapx_rows, mapy_rows, dst_rows); + } } } diff --git a/kleidicv/src/transform/remap_sc.h b/kleidicv/src/transform/remap_sc.h index dc52a2931fac060de7e058df5991108366080cea..a0e27f1696bb2b418d7def0c95c478ada2664520 100644 --- a/kleidicv/src/transform/remap_sc.h +++ b/kleidicv/src/transform/remap_sc.h @@ -822,6 +822,127 @@ kleidicv_error_t remap_s16point5_sc( return KLEIDICV_OK; } +template +void remap32f_nearest(svuint32_t sv_xmax, svuint32_t sv_ymax, + svuint32_t sv_src_stride, Rows src_rows, + svuint32_t sv_border, Columns dst, + size_t kStep, size_t dst_width, + Rows mapx_rows, + Rows mapy_rows) { + svbool_t pg_all32 = svptrue_b32(); + auto load_coords = [&](svbool_t pg, size_t xs) { + auto x = static_cast(xs); + return svcreate2(svld1_f32(pg, &mapx_rows.as_columns()[x]), + svld1_f32(pg, &mapy_rows.as_columns()[x])); + }; + + auto get_pixels = [&](svbool_t pg, svuint32x2_t coords) { + svuint32_t x = svget2(coords, 0); + svuint32_t y = svget2(coords, 1); + if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { + svbool_t in_range = svand_b_z(pg, svcmple_u32(pg, x, sv_xmax), + svcmple_u32(pg, y, sv_ymax)); + svuint32_t result = load_common( + in_range, x, y, sv_src_stride, src_rows); + // Select between source pixels and border colour + return svsel_u32(in_range, result, sv_border); + } else { + static_assert(Border == KLEIDICV_BORDER_TYPE_REPLICATE); + return load_common(pg, x, y, sv_src_stride, + src_rows); + } + }; + + auto calculate_nearest_coordinates = [&](svbool_t pg32, size_t x) { + svfloat32x2_t coords = load_coords(pg32, x); + svfloat32_t xf = svget2(coords, 0); + svfloat32_t yf = svget2(coords, 1); + + svuint32_t xi, yi; + if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { + // Round to the nearest integer + xi = svreinterpret_u32_s32( + svcvt_s32_f32_x(pg_all32, svrinta_f32_x(pg_all32, xf))); + yi = svreinterpret_u32_s32( + svcvt_s32_f32_x(pg_all32, svrinta_f32_x(pg_all32, yf))); + } else { + // Round to the nearest integer, clamp it to within the dimensions of + // the source image (negative values are already saturated to 0) + xi = svmin_x(pg_all32, + svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, xf, 0.5F)), + sv_xmax); + yi = svmin_x(pg_all32, + svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, yf, 0.5F)), + sv_ymax); + } + return svcreate2(xi, yi); + }; + + LoopUnroll2 loop{dst_width, kStep}; + + if constexpr (std::is_same::value) { + auto vector_path_generic = [&](size_t x, size_t x_max, + Columns dst) { + size_t length = x_max - x; + svbool_t pg32 = svwhilelt_b32(0ULL, length); + svuint32_t result = + get_pixels(pg32, calculate_nearest_coordinates(pg32, x)); + svst1b_u32(pg32, &dst[static_cast(x)], result); + }; + + loop.unroll_four_times([&](size_t x) { + ScalarType* p_dst = &dst[static_cast(x)]; + svuint32_t res32_0 = + get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); + x += kStep; + svuint32_t res32_1 = + get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); + svuint16_t result0 = svuzp1_u16(svreinterpret_u16_u32(res32_0), + svreinterpret_u16_u32(res32_1)); + x += kStep; + res32_0 = + get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); + x += kStep; + res32_1 = + get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); + svuint16_t result1 = svuzp1_u16(svreinterpret_u16_u32(res32_0), + svreinterpret_u16_u32(res32_1)); + svuint8_t result = svuzp1_u8(svreinterpret_u8_u16(result0), + svreinterpret_u8_u16(result1)); + svst1(svptrue_b8(), p_dst, result); + }); + loop.unroll_once([&](size_t x) { vector_path_generic(x, x + kStep, dst); }); + loop.remaining( + [&](size_t x, size_t length) { vector_path_generic(x, length, dst); }); + } + + if constexpr (std::is_same::value) { + auto vector_path_generic = [&](size_t x, size_t x_max, + Columns dst) { + size_t length = x_max - x; + svbool_t pg32 = svwhilelt_b32(0ULL, length); + svuint32_t result = + get_pixels(pg32, calculate_nearest_coordinates(pg32, x)); + svst1h_u32(pg32, &dst[static_cast(x)], result); + }; + + loop.unroll_twice([&](size_t x) { + ScalarType* p_dst = &dst[static_cast(x)]; + svuint32_t res32_0 = + get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); + x += kStep; + svuint32_t res32_1 = + get_pixels(pg_all32, calculate_nearest_coordinates(pg_all32, x)); + svuint16_t result = svuzp1_u16(svreinterpret_u16_u32(res32_0), + svreinterpret_u16_u32(res32_1)); + svst1(svptrue_b16(), p_dst, result); + }); + loop.unroll_once([&](size_t x) { vector_path_generic(x, x + kStep, dst); }); + loop.remaining( + [&](size_t x, size_t length) { vector_path_generic(x, length, dst); }); + } +} + // TODO reduce functional complexity template @@ -835,10 +956,8 @@ void remap32f_process_rows(Rows src_rows, size_t src_width, svuint32_t sv_xmax = svdup_n_u32(src_width - 1); svuint32_t sv_ymax = svdup_n_u32(src_height - 1); svuint32_t sv_src_stride = svdup_n_u32(src_rows.stride()); - svuint32_t sv_border; - // sv_border is only used if the border type is constant. - // If the border type is not constant then border_value is permitted to be - // null and must not be read. + svuint32_t sv_border = svdup_n_u32(0); + if constexpr (Border == KLEIDICV_BORDER_TYPE_CONSTANT) { sv_border = svdup_n_u32(border_value[0]); } @@ -868,13 +987,13 @@ void remap32f_process_rows(Rows src_rows, size_t src_width, } }; - auto process_row = [&]() { + for (size_t y = y_begin; y < y_end; ++y) { Columns dst = dst_rows.as_columns(); LoopUnroll2 loop{dst_width, kStep}; - // GCOVR_EXCL_START if constexpr (Inter == KLEIDICV_INTERPOLATION_NEAREST) { - assert(!"INTER_NEAREST not implemented for RemapF32"); - // GCOVR_EXCL_STOP + remap32f_nearest( + sv_xmax, sv_ymax, sv_src_stride, src_rows, sv_border, dst, kStep, + dst_width, mapx_rows, mapy_rows); } else if constexpr (Inter == KLEIDICV_INTERPOLATION_LINEAR) { if constexpr (std::is_same::value) { loop.unroll_four_times([&](size_t x) { @@ -933,10 +1052,6 @@ void remap32f_process_rows(Rows src_rows, size_t src_width, } ++mapx_rows; ++mapy_rows; - }; - - for (size_t y = y_begin; y < y_end; ++y) { - process_row(); ++dst_rows; } } diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index a8dc74e71b77a5c8d5341ab932c665242e3430c1..0625b6bd37e0eff38df8fcb15fa7a874b728551e 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -86,10 +86,14 @@ Remap_S16Point5_U8_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8U Remap_S16Point5_U8_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 16SC2, INTER_LINEAR, BORDER_CONSTANT)' Remap_S16Point5_U16_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 16SC2, INTER_LINEAR, BORDER_REPLICATE)' Remap_S16Point5_U16_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 16SC2, INTER_LINEAR, BORDER_CONSTANT)' -Remap_F32_U8_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_LINEAR, BORDER_REPLICATE)' -Remap_F32_U8_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_LINEAR, BORDER_CONSTANT)' -Remap_F32_U16_Replicate: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 32FC1, INTER_LINEAR, BORDER_REPLICATE)' -Remap_F32_U16_Constant: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 32FC1, INTER_LINEAR, BORDER_CONSTANT)' +Remap_F32_U8_Replicate_Linear: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_LINEAR, BORDER_REPLICATE)' +Remap_F32_U8_Constant_Linear: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_LINEAR, BORDER_CONSTANT)' +Remap_F32_U16_Replicate_Linear: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 32FC1, INTER_LINEAR, BORDER_REPLICATE)' +Remap_F32_U16_Constant_Linear: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 32FC1, INTER_LINEAR, BORDER_CONSTANT)' +Remap_F32_U8_Replicate_Nearest: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_NEAREST, BORDER_REPLICATE)' +Remap_F32_U8_Constant_Nearest: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 8UC1, 32FC1, INTER_NEAREST, BORDER_CONSTANT)' +Remap_F32_U16_Replicate_Nearest: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 32FC1, INTER_NEAREST, BORDER_REPLICATE)' +Remap_F32_U16_Constant_Nearest: opencv_perf_imgproc '*Remap/*' '($PIXEL_FORMAT, 16UC1, 32FC1, INTER_NEAREST, BORDER_CONSTANT)' WarpPerspective_Nearest: opencv_perf_imgproc '*WarpPerspective/*' '($PIXEL_FORMAT, INTER_NEAREST, BORDER_REPLICATE, 1)' WarpPerspective_Linear: opencv_perf_imgproc '*WarpPerspective/*' '($PIXEL_FORMAT, INTER_LINEAR, BORDER_REPLICATE, 1)' diff --git a/test/api/test_remap.cpp b/test/api/test_remap.cpp index 5966dd191bc76db3f4f91bbc86d937c9783ff2fb..7a18a0bcf113bde7e967345fc3dd263d57ef25d2 100644 --- a/test/api/test_remap.cpp +++ b/test/api/test_remap.cpp @@ -863,20 +863,23 @@ class RemapF32 : public testing::Test { static void test_random(size_t src_w, size_t src_h, size_t dst_w, size_t dst_h, size_t channels, kleidicv_border_type_t border_type, - const ScalarType *border_value, size_t padding) { + const ScalarType *border_value, + kleidicv_interpolation_type_t interpolation, + size_t padding) { test::PseudoRandomNumberGenerator coord_generator; test::Array2D mapx(dst_w, dst_h, padding); test::Array2D mapy(dst_w, dst_h, padding); mapx.fill(coord_generator); mapy.fill(coord_generator); execute_test(mapx, mapy, src_w, src_h, dst_w, dst_h, channels, border_type, - border_value, padding); + border_value, interpolation, padding); } static void test_outside_random(size_t src_w, size_t src_h, size_t dst_w, size_t dst_h, size_t channels, kleidicv_border_type_t border_type, const ScalarType *border_value, + kleidicv_interpolation_type_t interpolation, size_t padding) { test::Array2D mapx(dst_w, dst_h, padding); test::PseudoRandomNumberGeneratorFloatRange xcoord_generator{ @@ -891,12 +894,14 @@ class RemapF32 : public testing::Test { static_cast(2 * src_h)}; mapy.fill(ycoord_generator); execute_test(mapx, mapy, src_w, src_h, dst_w, dst_h, channels, border_type, - border_value, padding); + border_value, interpolation, padding); } static void test_blend(size_t src_w, size_t src_h, size_t dst_w, size_t dst_h, size_t channels, kleidicv_border_type_t border_type, - const ScalarType *border_value, size_t padding) { + const ScalarType *border_value, + kleidicv_interpolation_type_t interpolation, + size_t padding) { test::Array2D mapx(dst_w, dst_h, padding); test::Array2D mapy(dst_w, dst_h, padding); for (size_t row = 0; row < dst_h; ++row) { @@ -913,7 +918,7 @@ class RemapF32 : public testing::Test { } } execute_test(mapx, mapy, src_w, src_h, dst_w, dst_h, channels, border_type, - border_value, padding); + border_value, interpolation, padding); } // Test coordinates with edge values that may easily overflow @@ -921,6 +926,7 @@ class RemapF32 : public testing::Test { size_t dst_h, size_t channels, kleidicv_border_type_t border_type, const ScalarType *border_value, + kleidicv_interpolation_type_t interpolation, size_t padding) { test::Array2D mapx(dst_w, dst_h, padding); test::Array2D mapy(dst_w, dst_h, padding); @@ -991,7 +997,8 @@ class RemapF32 : public testing::Test { test::PseudoRandomNumberGenerator generator; actual.fill(42); - calculate_expected(source, mapx, mapy, border_type, border_value, expected); + calculate_expected(source, mapx, mapy, border_type, border_value, + interpolation, expected); ASSERT_EQ( KLEIDICV_OK, @@ -999,7 +1006,7 @@ class RemapF32 : public testing::Test { source.data(), source.stride(), source.width(), source.height(), actual.data(), actual.stride(), actual.width(), actual.height(), channels, mapx.data(), mapx.stride(), mapy.data(), mapy.stride(), - KLEIDICV_INTERPOLATION_LINEAR, border_type, border_value)); + interpolation, border_type, border_value)); if (expected.compare_to(actual, 1)) { if (source.width() < 100 && source.height() < 100) { @@ -1024,7 +1031,9 @@ class RemapF32 : public testing::Test { test::Array2D &mapy, size_t src_w, size_t src_h, size_t dst_w, size_t dst_h, size_t channels, kleidicv_border_type_t border_type, - const ScalarType *border_value, size_t padding) { + const ScalarType *border_value, + kleidicv_interpolation_type_t interpolation, + size_t padding) { size_t src_total_width = channels * src_w; size_t dst_total_width = channels * dst_w; @@ -1036,7 +1045,8 @@ class RemapF32 : public testing::Test { source.fill(generator); actual.fill(42); - calculate_expected(source, mapx, mapy, border_type, border_value, expected); + calculate_expected(source, mapx, mapy, border_type, border_value, + interpolation, expected); ASSERT_EQ( KLEIDICV_OK, @@ -1044,7 +1054,7 @@ class RemapF32 : public testing::Test { source.data(), source.stride(), source.width(), source.height(), actual.data(), actual.stride(), actual.width(), actual.height(), channels, mapx.data(), mapx.stride(), mapy.data(), mapy.stride(), - KLEIDICV_INTERPOLATION_LINEAR, border_type, border_value)); + interpolation, border_type, border_value)); if (expected.compare_to(actual, 1)) { if (source.width() < 100 && source.height() < 100) { @@ -1069,6 +1079,7 @@ class RemapF32 : public testing::Test { test::Array2D &mapy, kleidicv_border_type_t border_type, const ScalarType *border_value, + kleidicv_interpolation_type_t interpolation, test::Array2D &expected) { auto get_src = [&](ptrdiff_t x, ptrdiff_t y) { return get_array2d_element_or_border(src, x, y, border_type, @@ -1078,9 +1089,10 @@ class RemapF32 : public testing::Test { for (size_t row = 0; row < expected.height(); row++) { for (size_t column = 0; column < expected.width() / src.channels(); ++column) { - for (size_t ch = 0; ch < src.channels(); ++ch) { - float x = *mapx.at(row, column); - float y = *mapy.at(row, column); + float x = *mapx.at(row, column); + float y = *mapy.at(row, column); + + if (interpolation == KLEIDICV_INTERPOLATION_LINEAR) { ptrdiff_t ix = static_cast(std::max( INT_MIN, std::min(std::floor(x), @@ -1091,15 +1103,31 @@ class RemapF32 : public testing::Test { static_cast(KLEIDICV_MAX_IMAGE_PIXELS)))); float xfrac = x - std::floor(x); float yfrac = y - std::floor(y); - float a = get_src(ix, iy)[ch]; - float b = get_src(ix + 1, iy)[ch]; - float c = get_src(ix, iy + 1)[ch]; - float d = get_src(ix + 1, iy + 1)[ch]; - float line1 = (b - a) * xfrac + a; - float line2 = (d - c) * xfrac + c; - float float_result = (line2 - line1) * yfrac + line1; - *expected.at(row, column * src.channels() + ch) = - static_cast(std::lround(float_result)); + for (size_t ch = 0; ch < src.channels(); ++ch) { + float a = get_src(ix, iy)[ch]; + float b = get_src(ix + 1, iy)[ch]; + float c = get_src(ix, iy + 1)[ch]; + float d = get_src(ix + 1, iy + 1)[ch]; + float line1 = (b - a) * xfrac + a; + float line2 = (d - c) * xfrac + c; + float float_result = (line2 - line1) * yfrac + line1; + *expected.at(row, column * src.channels() + ch) = + static_cast(std::lround(float_result)); + } + } else { + assert(interpolation == KLEIDICV_INTERPOLATION_NEAREST); + ptrdiff_t ix = static_cast(std::max( + INT_MIN, + std::min(std::round(x), + static_cast(KLEIDICV_MAX_IMAGE_PIXELS)))); + ptrdiff_t iy = static_cast(std::max( + INT_MIN, + std::min(std::round(y), + static_cast(KLEIDICV_MAX_IMAGE_PIXELS)))); + for (size_t ch = 0; ch < src.channels(); ++ch) { + *expected.at(row, column * src.channels() + ch) = + get_src(ix, iy)[ch]; + } } } } @@ -1117,8 +1145,12 @@ TYPED_TEST(RemapF32, RandomNoPadding) { size_t channels = 1; size_t padding = 0; for (auto [border_type, border_value] : get_borders()) { - TestFixture::test_random(src_w, src_h, dst_w, dst_h, channels, border_type, - border_value, padding); + for (auto interpolation : + {KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_INTERPOLATION_NEAREST}) { + TestFixture::test_random(src_w, src_h, dst_w, dst_h, channels, + border_type, border_value, interpolation, + padding); + } } } @@ -1130,8 +1162,11 @@ TYPED_TEST(RemapF32, BlendPadding) { size_t channels = 1; size_t padding = 13; for (auto [border_type, border_value] : get_borders()) { - TestFixture::test_blend(src_w, src_h, dst_w, dst_h, channels, border_type, - border_value, padding); + for (auto interpolation : + {KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_INTERPOLATION_NEAREST}) { + TestFixture::test_blend(src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, interpolation, padding); + } } } @@ -1143,8 +1178,12 @@ TYPED_TEST(RemapF32, OutsideRandomPadding) { size_t channels = 1; size_t padding = 13; for (auto [border_type, border_value] : get_borders()) { - TestFixture::test_outside_random(src_w, src_h, dst_w, dst_h, channels, - border_type, border_value, padding); + for (auto interpolation : + {KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_INTERPOLATION_NEAREST}) { + TestFixture::test_outside_random(src_w, src_h, dst_w, dst_h, channels, + border_type, border_value, interpolation, + padding); + } } } @@ -1156,8 +1195,11 @@ TYPED_TEST(RemapF32, BlendBigStride) { size_t channels = 1; size_t padding = 1 << 16; for (auto [border_type, border_value] : get_borders()) { - TestFixture::test_blend(src_w, src_h, dst_w, dst_h, channels, border_type, - border_value, padding); + for (auto interpolation : + {KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_INTERPOLATION_NEAREST}) { + TestFixture::test_blend(src_w, src_h, dst_w, dst_h, channels, border_type, + border_value, interpolation, padding); + } } } @@ -1169,8 +1211,12 @@ TYPED_TEST(RemapF32, CornerCases) { size_t channels = 1; size_t padding = 17; for (auto [border_type, border_value] : get_borders()) { - TestFixture::test_corner_cases(src_w, src_h, dst_w, dst_h, channels, - border_type, border_value, padding); + for (auto interpolation : + {KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_INTERPOLATION_NEAREST}) { + TestFixture::test_corner_cases(src_w, src_h, dst_w, dst_h, channels, + border_type, border_value, interpolation, + padding); + } } } @@ -1184,8 +1230,12 @@ TYPED_TEST(RemapF32, CornerCasesLargeLoad) { size_t channels = 1; size_t padding = 1; for (auto [border_type, border_value] : get_borders()) { - TestFixture::test_corner_cases(src_w, src_h, dst_w, dst_h, channels, - border_type, border_value, padding); + for (auto interpolation : + {KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_INTERPOLATION_NEAREST}) { + TestFixture::test_corner_cases(src_w, src_h, dst_w, dst_h, channels, + border_type, border_value, interpolation, + padding); + } } } @@ -1224,6 +1274,7 @@ TYPED_TEST(RemapF32, ZeroHeightImage) { const size_t mapx_stride = kW * sizeof(float); const size_t mapy_stride = kW * sizeof(float); + // TODO: Why these sets of parameters? for (auto [border_type, border_value] : get_borders()) { EXPECT_EQ(KLEIDICV_OK, remap_f32()(src, src_stride, kW, 1, dst, dst_stride, @@ -1251,6 +1302,7 @@ TYPED_TEST(RemapF32, InvalidImageSize) { float mapx[1] = {}; float mapy[1] = {}; + // TODO: Why these sets of parameters? EXPECT_EQ( KLEIDICV_ERROR_RANGE, remap_f32()(src, element_size, KLEIDICV_MAX_IMAGE_PIXELS + 1, @@ -1297,21 +1349,6 @@ TYPED_TEST(RemapF32, UnsupportedTwoChannels) { KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); } -TYPED_TEST(RemapF32, UnsupportedInterpolationTypeNEAREST) { - const size_t element_size = sizeof(TypeParam); - const TypeParam src[1] = {}; - TypeParam dst[16]; - float mapx[16] = {}; - float mapy[16] = {}; - - EXPECT_EQ( - KLEIDICV_ERROR_NOT_IMPLEMENTED, - remap_f32()(src, element_size, 1, 1, dst, 16 * element_size, - 16, 1, 1, mapx, 16 * sizeof(float), mapy, - 16 * sizeof(float), KLEIDICV_INTERPOLATION_NEAREST, - KLEIDICV_BORDER_TYPE_REPLICATE, nullptr)); -} - TYPED_TEST(RemapF32, UnsupportedTooSmallImage) { const size_t element_size = sizeof(TypeParam); const TypeParam src[1] = {}; diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index c1a53f6daeb1b9d815bbea307f99049a34a8870a..128cf7af7209275f708135dd0f1d263008231654 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -780,9 +780,6 @@ TEST_P(Thread, remap_f32_u8_not_implemented) { check_remap_f32_not_implemented( kleidicv_thread_remap_f32_u8, 2, KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, &border_value); - check_remap_f32_not_implemented( - kleidicv_thread_remap_f32_u8, 1, KLEIDICV_INTERPOLATION_NEAREST, - KLEIDICV_BORDER_TYPE_REPLICATE, &border_value); check_remap_f32_not_implemented( kleidicv_thread_remap_f32_u8, 1, KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REFLECT, &border_value); @@ -807,9 +804,6 @@ TEST_P(Thread, remap_f32_u16_not_implemented) { check_remap_f32_not_implemented( kleidicv_thread_remap_f32_u16, 2, KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REPLICATE, &border_value); - check_remap_f32_not_implemented( - kleidicv_thread_remap_f32_u16, 1, KLEIDICV_INTERPOLATION_NEAREST, - KLEIDICV_BORDER_TYPE_REPLICATE, &border_value); check_remap_f32_not_implemented( kleidicv_thread_remap_f32_u16, 1, KLEIDICV_INTERPOLATION_LINEAR, KLEIDICV_BORDER_TYPE_REFLECT, &border_value);