diff --git a/kleidicv/src/transform/common_sc.h b/kleidicv/src/transform/common_sc.h index f7a7eb01825dbc8bd578cc786f6ceba5a28c15a3..9285105500632fafde3fc5c604cc2c1d0061b917 100644 --- a/kleidicv/src/transform/common_sc.h +++ b/kleidicv/src/transform/common_sc.h @@ -90,14 +90,14 @@ svuint32_t inline load_common(svbool_t pg, svuint32_t x, svuint32_t y, } template -svuint32_t inline calculate_linear_replicate(svbool_t pg, svfloat32x2_t coords, - svfloat32_t xmaxf, - svfloat32_t ymaxf, - svuint32_t sv_src_stride, - Rows &src_rows) { +svuint32_t inline calculate_linear_replicated_border( + svbool_t pg, svfloat32x2_t coords, svfloat32_t xmaxf, svfloat32_t ymaxf, + svuint32_t sv_src_stride, Rows &src_rows) { auto load_source = [&](svuint32_t x, svuint32_t y) { return load_common(pg, x, y, sv_src_stride, src_rows); }; + const float MULTIPLIER = 1 << 8; + svuint32_t bias = svdup_n_u32(MULTIPLIER * MULTIPLIER / 2); svbool_t pg_all32 = svptrue_b32(); svfloat32_t xf = svget2(coords, 0); svfloat32_t yf = svget2(coords, 1); @@ -111,12 +111,25 @@ svuint32_t inline calculate_linear_replicate(svbool_t pg, svfloat32x2_t coords, svcmplt_f32(pg_all32, xf, xmaxf)); svbool_t y_in_range = svand_z(pg_all32, svcmpge_n_f32(pg_all32, yf, 0.F), svcmplt_f32(pg_all32, yf, ymaxf)); - svfloat32_t xfrac = - svsel_f32(x_in_range, svsub_f32_x(pg_all32, xf, svrintm_x(pg_all32, xf)), - svdup_n_f32(0.F)); - svfloat32_t yfrac = - svsel_f32(y_in_range, svsub_f32_x(pg_all32, yf, svrintm_x(pg_all32, yf)), - svdup_n_f32(0.F)); + svuint32_t xfrac = svsel_u32( + x_in_range, + svcvt_u32_f32_x( + pg_all32, + svmul_n_f32_x(pg_all32, + svsub_f32_x(pg_all32, xf, svrintm_x(pg_all32, xf)), + MULTIPLIER)), + svdup_n_u32(0)); + svuint32_t yfrac = svsel_u32( + y_in_range, + svcvt_u32_f32_x( + pg_all32, + svmul_n_f32_x(pg_all32, + svsub_f32_x(pg_all32, yf, svrintm_x(pg_all32, yf)), + MULTIPLIER)), + svdup_n_u32(0)); + + svuint32_t nxfrac = svsub_u32_x(pg_all32, svdup_n_u32(MULTIPLIER), xfrac); + svuint32_t nyfrac = svsub_u32_x(pg_all32, svdup_n_u32(MULTIPLIER), yfrac); // x1 = x0 + 1, except if it's already xmax or out of range svuint32_t x1 = svsel_u32(x_in_range, svadd_n_u32_x(pg_all32, x0, 1), x0); @@ -124,17 +137,19 @@ svuint32_t inline calculate_linear_replicate(svbool_t pg, svfloat32x2_t coords, // Calculate offsets from coordinates (y * stride + x) // a: top left, b: top right, c: bottom left, d: bottom right - svfloat32_t a = svcvt_f32_u32_x(pg_all32, load_source(x0, y0)); - svfloat32_t b = svcvt_f32_u32_x(pg_all32, load_source(x1, y0)); - svfloat32_t line0 = - svmla_f32_x(pg_all32, a, svsub_f32_x(pg_all32, b, a), xfrac); - svfloat32_t c = svcvt_f32_u32_x(pg_all32, load_source(x0, y1)); - svfloat32_t d = svcvt_f32_u32_x(pg_all32, load_source(x1, y1)); - svfloat32_t line1 = - svmla_f32_x(pg_all32, c, svsub_f32_x(pg_all32, d, c), xfrac); - svfloat32_t result = - svmla_f32_x(pg_all32, line0, svsub_f32_x(pg_all32, line1, line0), yfrac); - return svcvt_u32_f32_x(pg_all32, svadd_n_f32_x(pg_all32, result, 0.5F)); + + svuint32_t a = load_source(x0, y0); + svuint32_t b = load_source(x1, y0); + svuint32_t line0 = svmla_x(pg_all32, svmul_x(pg_all32, a, nxfrac), b, xfrac); + + svuint32_t c = load_source(x0, y1); + svuint32_t d = load_source(x1, y1); + svuint32_t line1 = svmla_x(pg_all32, svmul_x(pg_all32, c, nxfrac), d, xfrac); + + svuint32_t acc = svmla_x(pg_all32, svmla_u32_x(pg_all32, bias, line0, nyfrac), + line1, yfrac); + + return svlsr_n_u32_x(pg_all32, acc, 16); } template diff --git a/kleidicv/src/transform/remap_sc.h b/kleidicv/src/transform/remap_sc.h index 8cf574b91c0aa3b5579a1a2396406c187e7b18ae..f7ec44e5c8e242b0a67518da8763b84e22667d0d 100644 --- a/kleidicv/src/transform/remap_sc.h +++ b/kleidicv/src/transform/remap_sc.h @@ -858,7 +858,7 @@ void remap32f_process_rows(Rows src_rows, size_t src_width, auto calculate_linear = [&](svbool_t pg, uint32_t x) { if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) { svfloat32x2_t coords = coordinate_getter(pg, x); - return calculate_linear_replicate( + return calculate_linear_replicated_border( pg, coords, xmaxf, ymaxf, sv_src_stride, src_rows); } else { static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT); diff --git a/kleidicv/src/transform/warp_perspective_neon.cpp b/kleidicv/src/transform/warp_perspective_neon.cpp index 1f4c6dbf45af951013aaee4bd77d876cf25efcec..c42bc0ef29d6321917cf3e92253cedde9128d8b6 100644 --- a/kleidicv/src/transform/warp_perspective_neon.cpp +++ b/kleidicv/src/transform/warp_perspective_neon.cpp @@ -299,7 +299,7 @@ void warp_perspective_operation(Rows src_rows, return vcvtq_f32_u32(vreinterpretq_u32_u64(rawsrc)); }; - auto calculate_linear_replicate = [&](uint32_t x) { + auto calculate_linear_replicated_border = [&](uint32_t x) { auto load_floats = [&](uint32x4_t x, uint32x4_t y) { if constexpr (IsLarge) { return load_src_into_floats_large(x, y); @@ -341,7 +341,7 @@ void warp_perspective_operation(Rows src_rows, auto calculate_linear = [&](uint32_t x) { if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) { - return calculate_linear_replicate(x); + return calculate_linear_replicated_border(x); } else { static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT); auto &&[xf, yf] = calculate_coordinates(x); diff --git a/kleidicv/src/transform/warp_perspective_sc.h b/kleidicv/src/transform/warp_perspective_sc.h index 9656d318c5eb28f8181d5c51b9e8e14542cb97f0..d75958b86b12b0e47e2cb6ede2eacc35e17d37ac 100644 --- a/kleidicv/src/transform/warp_perspective_sc.h +++ b/kleidicv/src/transform/warp_perspective_sc.h @@ -171,7 +171,7 @@ void remap32f_process_rows(Rows src_rows, size_t src_width, auto calculate_linear = [&](svbool_t pg, uint32_t x) { if constexpr (Border == KLEIDICV_BORDER_TYPE_REPLICATE) { svfloat32x2_t coords = coordinate_getter(pg, x); - return calculate_linear_replicate( + return calculate_linear_replicated_border( pg, coords, xmaxf, ymaxf, sv_src_stride, src_rows); } else { static_assert(Border == KLEIDICV_BORDER_TYPE_CONSTANT); diff --git a/test/api/test_remap.cpp b/test/api/test_remap.cpp index 72de4a4bb0431909dee8a543f80c451bd65bc0ab..71b9e8f6df67516ff28c8d5dbbbffe673a530973 100644 --- a/test/api/test_remap.cpp +++ b/test/api/test_remap.cpp @@ -993,7 +993,7 @@ class RemapF32 : public testing::Test { channels, mapx.data(), mapx.stride(), mapy.data(), mapy.stride(), KLEIDICV_INTERPOLATION_LINEAR, border_type, border_value)); - EXPECT_EQ_ARRAY2D(actual, expected); + EXPECT_EQ_ARRAY2D_WITH_TOLERANCE(1, actual, expected); } private: @@ -1023,7 +1023,7 @@ class RemapF32 : public testing::Test { channels, mapx.data(), mapx.stride(), mapy.data(), mapy.stride(), KLEIDICV_INTERPOLATION_LINEAR, border_type, border_value)); - EXPECT_EQ_ARRAY2D(actual, expected); + EXPECT_EQ_ARRAY2D_WITH_TOLERANCE(1, actual, expected); } static void calculate_expected(test::Array2D &src,