diff --git a/CHANGELOG.md b/CHANGELOG.md index 106ad00b5027cf138b833881dc9d7d0b1e3e2dbc..f530d2d2a78a0ad9620fae41a782a8be24dbfacb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,8 +17,8 @@ This changelog aims to follow the guiding principles of ### Added - Remap implementation - - 2-channel s16 and s16+u16 fixed-point coordinates and 1-channel u8 input. - - 2-channel s16 + 5+5 bits' fractions fixed-point coordinates and 1-channel u8 input. + - 2-channel s16 and s16+u16 fixed-point coordinates and 1-channel u8 input, replicated borders. + - 2-channel s16 + 5+5 bits' fractions fixed-point coordinates and 1-channel u8 input, replicated borders. - WarpPerspective implementation - Nearest neighbour implementation for replicated borders and 1-channel u8 input. - Implementation for cv::pyrDown in the OpenCV HAL. diff --git a/kleidicv/include/kleidicv/remap/remap.h b/kleidicv/include/kleidicv/transform/remap.h similarity index 100% rename from kleidicv/include/kleidicv/remap/remap.h rename to kleidicv/include/kleidicv/transform/remap.h diff --git a/kleidicv/src/remap/remap_api.cpp b/kleidicv/src/transform/remap_api.cpp similarity index 94% rename from kleidicv/src/remap/remap_api.cpp rename to kleidicv/src/transform/remap_api.cpp index f5778258bc4fa35aa20f4a2526b336c6e379d249..52dda620355845e6d5f6ee4fb8e636f2ed43ce4d 100644 --- a/kleidicv/src/remap/remap_api.cpp +++ b/kleidicv/src/transform/remap_api.cpp @@ -4,7 +4,7 @@ #include "kleidicv/dispatch.h" #include "kleidicv/kleidicv.h" -#include "kleidicv/remap/remap.h" +#include "kleidicv/transform/remap.h" KLEIDICV_MULTIVERSION_C_API(kleidicv_remap_s16_u8, &kleidicv::neon::remap_s16, diff --git a/kleidicv/src/remap/remap_neon.cpp b/kleidicv/src/transform/remap_neon.cpp similarity index 99% rename from kleidicv/src/remap/remap_neon.cpp rename to kleidicv/src/transform/remap_neon.cpp index a4688d6890817543342cc53b7045a3269f530053..3259a5a96a616b9dcd2801b4938ae6af547df722 100644 --- a/kleidicv/src/remap/remap_neon.cpp +++ b/kleidicv/src/transform/remap_neon.cpp @@ -6,7 +6,7 @@ #include "kleidicv/kleidicv.h" #include "kleidicv/neon.h" -#include "kleidicv/remap/remap.h" +#include "kleidicv/transform/remap.h" namespace kleidicv::neon { diff --git a/kleidicv/src/remap/remap_sc.h b/kleidicv/src/transform/remap_sc.h similarity index 95% rename from kleidicv/src/remap/remap_sc.h rename to kleidicv/src/transform/remap_sc.h index cd5718f4f10ff94a4673928d4a1b5f82d6c6d50e..383b4d78fdc766050138ae573fa34347f847ac2b 100644 --- a/kleidicv/src/remap/remap_sc.h +++ b/kleidicv/src/transform/remap_sc.h @@ -12,8 +12,8 @@ #include #include -#include "kleidicv/remap/remap.h" #include "kleidicv/sve2.h" +#include "kleidicv/transform/remap.h" namespace KLEIDICV_TARGET_NAMESPACE { @@ -267,9 +267,8 @@ class RemapS16Point5SVE2 { acc_b = svmlalb_u32(acc_b, line1, yfrac); acc_t = svmlalt_u32(acc_t, line1, yfrac); - svuint16_t result = svshrnt( - svshrnb(acc_b, static_cast(2 * REMAP16POINT5_FRAC_BITS)), - acc_t, static_cast(2 * REMAP16POINT5_FRAC_BITS)); + svuint16_t result = svshrnt(svshrnb(acc_b, 2ULL * REMAP16POINT5_FRAC_BITS), + acc_t, 2ULL * REMAP16POINT5_FRAC_BITS); svst1b_u16(pg, &dst[0], result); mapfrac += step; dst += step; @@ -357,10 +356,9 @@ class RemapS16Point5SME2 : public RemapS16Point5SVE2 { svuint32_t bias = svdup_n_u32(REMAP16POINT5_FRAC_MAX_SQUARE / 2); - auto vector_path = [&](svbool_t pg, + auto vector_path = [&](svbool_t pg16, svbool_t pg8, ptrdiff_t step) KLEIDICV_STREAMING_COMPATIBLE { // Deinterleave abcd into two vectors, ac and bd - svbool_t pg8 = svwhilelt_b8(int64_t{0}, 2 * step); svuint8x2_t src = svld2_u8(pg8, &demapped_src[0]); svuint16_t src_a = svmovlb_u16(svget2(src, 0)); @@ -368,20 +366,22 @@ class RemapS16Point5SME2 : public RemapS16Point5SVE2 { svuint16_t src_c = svmovlt_u16(svget2(src, 0)); svuint16_t src_d = svmovlt_u16(svget2(src, 1)); - interpolate_and_store(pg, step, mapfrac, dst, src_a, src_b, src_c, src_d, - bias); + interpolate_and_store(pg16, step, mapfrac, dst, src_a, src_b, src_c, + src_d, bias); demapped_src += step; }; + svbool_t ptrue_16 = FracVecTraits::svptrue(); + svbool_t ptrue_8 = svptrue_b8(); LoopUnroll loop{rowbuffer_width_, FracVecTraits::num_lanes()}; loop.unroll_once([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE { - vector_path(FracVecTraits::svptrue(), static_cast(step)); + vector_path(ptrue_16, ptrue_8, static_cast(step)); + }); + loop.remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg16 = FracVecTraits::svwhilelt(size_t{0}, length); + svbool_t pg8 = svwhilelt_b8(size_t{0}, 2 * length); + vector_path(pg16, pg8, static_cast(length)); }); - loop.remaining( - [&](size_t length, size_t step) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t pg = FracVecTraits::svwhilelt(step - length, step); - vector_path(pg, static_cast(length)); - }); } size_t rowbuffer_width_; diff --git a/kleidicv/src/remap/remap_sme2.cpp b/kleidicv/src/transform/remap_sme2.cpp similarity index 100% rename from kleidicv/src/remap/remap_sme2.cpp rename to kleidicv/src/transform/remap_sme2.cpp diff --git a/kleidicv/src/remap/remap_sve2.cpp b/kleidicv/src/transform/remap_sve2.cpp similarity index 100% rename from kleidicv/src/remap/remap_sve2.cpp rename to kleidicv/src/transform/remap_sve2.cpp diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 6097dab4cf1f03cbf4af598cdb0e5cce84f20c56..0e273aa42764196adf5f20a8e83beeefe5ede49e 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -15,8 +15,8 @@ #include "kleidicv/filters/separable_filter_2d.h" #include "kleidicv/filters/sobel.h" #include "kleidicv/kleidicv.h" -#include "kleidicv/remap/remap.h" #include "kleidicv/resize/resize_linear.h" +#include "kleidicv/transform/remap.h" #include "kleidicv/transform/warp_perspective.h" typedef std::function FunctionCallback; diff --git a/test/api/test_remap.cpp b/test/api/test_remap.cpp index f1c7863d2e5cfa9d8c1286110d67cf568c096373..bdb5bf9d31da689acfb2e8d884269c626a4b5d21 100644 --- a/test/api/test_remap.cpp +++ b/test/api/test_remap.cpp @@ -371,19 +371,6 @@ class RemapS16Point5 : public testing::Test { channels, mapxy.data(), mapxy.stride(), mapfrac.data(), mapfrac.stride(), KLEIDICV_BORDER_TYPE_REPLICATE, {})); - if (expected.compare_to(actual)) { - std::cout << "source:\n"; - dump(&source); - std::cout << "mapxy:\n"; - dump(&mapxy); - std::cout << "mapfrac:\n"; - dump(&mapfrac); - std::cout << "expected:\n"; - dump(&expected); - std::cout << "actual:\n"; - dump(&actual); - } - EXPECT_EQ_ARRAY2D(actual, expected); }