From 0d2ef70a56db0978e35ef0f66c17af84b460f952 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 7 Aug 2024 18:14:06 +0200 Subject: [PATCH] Refactor and unify border header files --- .../kleidicv/separable_filter_15x15_neon.h | 154 +++++----- .../kleidicv/separable_filter_15x15_sc.h | 124 ++++---- .../kleidicv/separable_filter_3x3_neon.h | 40 +-- .../kleidicv/separable_filter_3x3_sc.h | 28 +- .../kleidicv/separable_filter_5x5_neon.h | 54 ++-- .../kleidicv/separable_filter_5x5_sc.h | 44 +-- .../kleidicv/separable_filter_7x7_neon.h | 74 ++--- .../kleidicv/separable_filter_7x7_sc.h | 60 ++-- kleidicv/include/kleidicv/workspace/border.h | 265 +++++++++++++++++ .../include/kleidicv/workspace/border_15x15.h | 276 ------------------ .../include/kleidicv/workspace/border_3x3.h | 116 -------- .../include/kleidicv/workspace/border_5x5.h | 162 ---------- .../include/kleidicv/workspace/border_7x7.h | 181 ------------ .../include/kleidicv/workspace/separable.h | 5 +- 14 files changed, 557 insertions(+), 1026 deletions(-) create mode 100644 kleidicv/include/kleidicv/workspace/border.h delete mode 100644 kleidicv/include/kleidicv/workspace/border_15x15.h delete mode 100644 kleidicv/include/kleidicv/workspace/border_3x3.h delete mode 100644 kleidicv/include/kleidicv/workspace/border_5x5.h delete mode 100644 kleidicv/include/kleidicv/workspace/border_7x7.h diff --git a/kleidicv/include/kleidicv/separable_filter_15x15_neon.h b/kleidicv/include/kleidicv/separable_filter_15x15_neon.h index 2475d1db3..7e6a4227f 100644 --- a/kleidicv/include/kleidicv/separable_filter_15x15_neon.h +++ b/kleidicv/include/kleidicv/separable_filter_15x15_neon.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_15X15_NEON_H #include "kleidicv/neon.h" -#include "kleidicv/workspace/border_15x15.h" +#include "kleidicv/workspace/border.h" namespace KLEIDICV_TARGET_NAMESPACE { @@ -26,7 +26,7 @@ class SeparableFilter { using BufferVecTraits = typename neon::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo15x15; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -42,41 +42,41 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { SourceVectorType src[15]; - src[0] = vld1q(&src_rows.at(border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(border_offsets.c2())[index]); - src[3] = vld1q(&src_rows.at(border_offsets.c3())[index]); - src[4] = vld1q(&src_rows.at(border_offsets.c4())[index]); - src[5] = vld1q(&src_rows.at(border_offsets.c5())[index]); - src[6] = vld1q(&src_rows.at(border_offsets.c6())[index]); - src[7] = vld1q(&src_rows.at(border_offsets.c7())[index]); - src[8] = vld1q(&src_rows.at(border_offsets.c8())[index]); - src[9] = vld1q(&src_rows.at(border_offsets.c9())[index]); - src[10] = vld1q(&src_rows.at(border_offsets.c10())[index]); - src[11] = vld1q(&src_rows.at(border_offsets.c11())[index]); - src[12] = vld1q(&src_rows.at(border_offsets.c12())[index]); - src[13] = vld1q(&src_rows.at(border_offsets.c13())[index]); - src[14] = vld1q(&src_rows.at(border_offsets.c14())[index]); + src[0] = vld1q(&src_rows.at(border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(border_offsets.c(2))[index]); + src[3] = vld1q(&src_rows.at(border_offsets.c(3))[index]); + src[4] = vld1q(&src_rows.at(border_offsets.c(4))[index]); + src[5] = vld1q(&src_rows.at(border_offsets.c(5))[index]); + src[6] = vld1q(&src_rows.at(border_offsets.c(6))[index]); + src[7] = vld1q(&src_rows.at(border_offsets.c(7))[index]); + src[8] = vld1q(&src_rows.at(border_offsets.c(8))[index]); + src[9] = vld1q(&src_rows.at(border_offsets.c(9))[index]); + src[10] = vld1q(&src_rows.at(border_offsets.c(10))[index]); + src[11] = vld1q(&src_rows.at(border_offsets.c(11))[index]); + src[12] = vld1q(&src_rows.at(border_offsets.c(12))[index]); + src[13] = vld1q(&src_rows.at(border_offsets.c(13))[index]); + src[14] = vld1q(&src_rows.at(border_offsets.c(14))[index]); filter_.vertical_vector_path(src, &dst_rows[index]); }); loop.tail([&](size_t index) { SourceType src[15]; - src[0] = src_rows.at(border_offsets.c0())[index]; - src[1] = src_rows.at(border_offsets.c1())[index]; - src[2] = src_rows.at(border_offsets.c2())[index]; - src[3] = src_rows.at(border_offsets.c3())[index]; - src[4] = src_rows.at(border_offsets.c4())[index]; - src[5] = src_rows.at(border_offsets.c5())[index]; - src[6] = src_rows.at(border_offsets.c6())[index]; - src[7] = src_rows.at(border_offsets.c7())[index]; - src[8] = src_rows.at(border_offsets.c8())[index]; - src[9] = src_rows.at(border_offsets.c9())[index]; - src[10] = src_rows.at(border_offsets.c10())[index]; - src[11] = src_rows.at(border_offsets.c11())[index]; - src[12] = src_rows.at(border_offsets.c12())[index]; - src[13] = src_rows.at(border_offsets.c13())[index]; - src[14] = src_rows.at(border_offsets.c14())[index]; + src[0] = src_rows.at(border_offsets.c(0))[index]; + src[1] = src_rows.at(border_offsets.c(1))[index]; + src[2] = src_rows.at(border_offsets.c(2))[index]; + src[3] = src_rows.at(border_offsets.c(3))[index]; + src[4] = src_rows.at(border_offsets.c(4))[index]; + src[5] = src_rows.at(border_offsets.c(5))[index]; + src[6] = src_rows.at(border_offsets.c(6))[index]; + src[7] = src_rows.at(border_offsets.c(7))[index]; + src[8] = src_rows.at(border_offsets.c(8))[index]; + src[9] = src_rows.at(border_offsets.c(9))[index]; + src[10] = src_rows.at(border_offsets.c(10))[index]; + src[11] = src_rows.at(border_offsets.c(11))[index]; + src[12] = src_rows.at(border_offsets.c(12))[index]; + src[13] = src_rows.at(border_offsets.c(13))[index]; + src[14] = src_rows.at(border_offsets.c(14))[index]; filter_.vertical_scalar_path(src, &dst_rows[index]); }); } @@ -88,21 +88,21 @@ class SeparableFilter { BufferVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; - auto src_3 = &src_rows.at(0, border_offsets.c3())[index]; - auto src_4 = &src_rows.at(0, border_offsets.c4())[index]; - auto src_5 = &src_rows.at(0, border_offsets.c5())[index]; - auto src_6 = &src_rows.at(0, border_offsets.c6())[index]; - auto src_7 = &src_rows.at(0, border_offsets.c7())[index]; - auto src_8 = &src_rows.at(0, border_offsets.c8())[index]; - auto src_9 = &src_rows.at(0, border_offsets.c9())[index]; - auto src_10 = &src_rows.at(0, border_offsets.c10())[index]; - auto src_11 = &src_rows.at(0, border_offsets.c11())[index]; - auto src_12 = &src_rows.at(0, border_offsets.c12())[index]; - auto src_13 = &src_rows.at(0, border_offsets.c13())[index]; - auto src_14 = &src_rows.at(0, border_offsets.c14())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; + auto src_5 = &src_rows.at(0, border_offsets.c(5))[index]; + auto src_6 = &src_rows.at(0, border_offsets.c(6))[index]; + auto src_7 = &src_rows.at(0, border_offsets.c(7))[index]; + auto src_8 = &src_rows.at(0, border_offsets.c(8))[index]; + auto src_9 = &src_rows.at(0, border_offsets.c(9))[index]; + auto src_10 = &src_rows.at(0, border_offsets.c(10))[index]; + auto src_11 = &src_rows.at(0, border_offsets.c(11))[index]; + auto src_12 = &src_rows.at(0, border_offsets.c(12))[index]; + auto src_13 = &src_rows.at(0, border_offsets.c(13))[index]; + auto src_14 = &src_rows.at(0, border_offsets.c(14))[index]; BufferVectorType src_a[15], src_b[15]; src_a[0] = vld1q(&src_0[0]); @@ -143,21 +143,21 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { BufferVectorType src[15]; - src[0] = vld1q(&src_rows.at(0, border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(0, border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(0, border_offsets.c2())[index]); - src[3] = vld1q(&src_rows.at(0, border_offsets.c3())[index]); - src[4] = vld1q(&src_rows.at(0, border_offsets.c4())[index]); - src[5] = vld1q(&src_rows.at(0, border_offsets.c5())[index]); - src[6] = vld1q(&src_rows.at(0, border_offsets.c6())[index]); - src[7] = vld1q(&src_rows.at(0, border_offsets.c7())[index]); - src[8] = vld1q(&src_rows.at(0, border_offsets.c8())[index]); - src[9] = vld1q(&src_rows.at(0, border_offsets.c9())[index]); - src[10] = vld1q(&src_rows.at(0, border_offsets.c10())[index]); - src[11] = vld1q(&src_rows.at(0, border_offsets.c11())[index]); - src[12] = vld1q(&src_rows.at(0, border_offsets.c12())[index]); - src[13] = vld1q(&src_rows.at(0, border_offsets.c13())[index]); - src[14] = vld1q(&src_rows.at(0, border_offsets.c14())[index]); + src[0] = vld1q(&src_rows.at(0, border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(0, border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(0, border_offsets.c(2))[index]); + src[3] = vld1q(&src_rows.at(0, border_offsets.c(3))[index]); + src[4] = vld1q(&src_rows.at(0, border_offsets.c(4))[index]); + src[5] = vld1q(&src_rows.at(0, border_offsets.c(5))[index]); + src[6] = vld1q(&src_rows.at(0, border_offsets.c(6))[index]); + src[7] = vld1q(&src_rows.at(0, border_offsets.c(7))[index]); + src[8] = vld1q(&src_rows.at(0, border_offsets.c(8))[index]); + src[9] = vld1q(&src_rows.at(0, border_offsets.c(9))[index]); + src[10] = vld1q(&src_rows.at(0, border_offsets.c(10))[index]); + src[11] = vld1q(&src_rows.at(0, border_offsets.c(11))[index]); + src[12] = vld1q(&src_rows.at(0, border_offsets.c(12))[index]); + src[13] = vld1q(&src_rows.at(0, border_offsets.c(13))[index]); + src[14] = vld1q(&src_rows.at(0, border_offsets.c(14))[index]); filter_.horizontal_vector_path(src, &dst_rows[index]); }); @@ -181,21 +181,21 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const { BufferType src[15]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; - src[3] = src_rows.at(0, border_offsets.c3())[index]; - src[4] = src_rows.at(0, border_offsets.c4())[index]; - src[5] = src_rows.at(0, border_offsets.c5())[index]; - src[6] = src_rows.at(0, border_offsets.c6())[index]; - src[7] = src_rows.at(0, border_offsets.c7())[index]; - src[8] = src_rows.at(0, border_offsets.c8())[index]; - src[9] = src_rows.at(0, border_offsets.c9())[index]; - src[10] = src_rows.at(0, border_offsets.c10())[index]; - src[11] = src_rows.at(0, border_offsets.c11())[index]; - src[12] = src_rows.at(0, border_offsets.c12())[index]; - src[13] = src_rows.at(0, border_offsets.c13())[index]; - src[14] = src_rows.at(0, border_offsets.c14())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; + src[3] = src_rows.at(0, border_offsets.c(3))[index]; + src[4] = src_rows.at(0, border_offsets.c(4))[index]; + src[5] = src_rows.at(0, border_offsets.c(5))[index]; + src[6] = src_rows.at(0, border_offsets.c(6))[index]; + src[7] = src_rows.at(0, border_offsets.c(7))[index]; + src[8] = src_rows.at(0, border_offsets.c(8))[index]; + src[9] = src_rows.at(0, border_offsets.c(9))[index]; + src[10] = src_rows.at(0, border_offsets.c(10))[index]; + src[11] = src_rows.at(0, border_offsets.c(11))[index]; + src[12] = src_rows.at(0, border_offsets.c(12))[index]; + src[13] = src_rows.at(0, border_offsets.c(13))[index]; + src[14] = src_rows.at(0, border_offsets.c(14))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/separable_filter_15x15_sc.h b/kleidicv/include/kleidicv/separable_filter_15x15_sc.h index f95067a09..b38193b51 100644 --- a/kleidicv/include/kleidicv/separable_filter_15x15_sc.h +++ b/kleidicv/include/kleidicv/separable_filter_15x15_sc.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_15X15_SC_H #include "kleidicv/sve2.h" -#include "kleidicv/workspace/border_15x15.h" +#include "kleidicv/workspace/border.h" // It is used by SVE2 and SME2, the actual namespace will reflect it. namespace KLEIDICV_TARGET_NAMESPACE { @@ -29,7 +29,7 @@ class SeparableFilter { typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo15x15; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -95,35 +95,35 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { SourceVectorType src_0 = - svld1(pg, &src_rows.at(border_offsets.c0())[index]); + svld1(pg, &src_rows.at(border_offsets.c(0))[index]); SourceVectorType src_1 = - svld1(pg, &src_rows.at(border_offsets.c1())[index]); + svld1(pg, &src_rows.at(border_offsets.c(1))[index]); SourceVectorType src_2 = - svld1(pg, &src_rows.at(border_offsets.c2())[index]); + svld1(pg, &src_rows.at(border_offsets.c(2))[index]); SourceVectorType src_3 = - svld1(pg, &src_rows.at(border_offsets.c3())[index]); + svld1(pg, &src_rows.at(border_offsets.c(3))[index]); SourceVectorType src_4 = - svld1(pg, &src_rows.at(border_offsets.c4())[index]); + svld1(pg, &src_rows.at(border_offsets.c(4))[index]); SourceVectorType src_5 = - svld1(pg, &src_rows.at(border_offsets.c5())[index]); + svld1(pg, &src_rows.at(border_offsets.c(5))[index]); SourceVectorType src_6 = - svld1(pg, &src_rows.at(border_offsets.c6())[index]); + svld1(pg, &src_rows.at(border_offsets.c(6))[index]); SourceVectorType src_7 = - svld1(pg, &src_rows.at(border_offsets.c7())[index]); + svld1(pg, &src_rows.at(border_offsets.c(7))[index]); SourceVectorType src_8 = - svld1(pg, &src_rows.at(border_offsets.c8())[index]); + svld1(pg, &src_rows.at(border_offsets.c(8))[index]); SourceVectorType src_9 = - svld1(pg, &src_rows.at(border_offsets.c9())[index]); + svld1(pg, &src_rows.at(border_offsets.c(9))[index]); SourceVectorType src_10 = - svld1(pg, &src_rows.at(border_offsets.c10())[index]); + svld1(pg, &src_rows.at(border_offsets.c(10))[index]); SourceVectorType src_11 = - svld1(pg, &src_rows.at(border_offsets.c11())[index]); + svld1(pg, &src_rows.at(border_offsets.c(11))[index]); SourceVectorType src_12 = - svld1(pg, &src_rows.at(border_offsets.c12())[index]); + svld1(pg, &src_rows.at(border_offsets.c(12))[index]); SourceVectorType src_13 = - svld1(pg, &src_rows.at(border_offsets.c13())[index]); + svld1(pg, &src_rows.at(border_offsets.c(13))[index]); SourceVectorType src_14 = - svld1(pg, &src_rows.at(border_offsets.c14())[index]); + svld1(pg, &src_rows.at(border_offsets.c(14))[index]); filter_.vertical_vector_path(pg, src_0, src_1, src_2, src_3, src_4, src_5, src_6, src_7, src_8, src_9, src_10, src_11, src_12, src_13, src_14, &dst_rows[index]); @@ -133,21 +133,21 @@ class SeparableFilter { svbool_t pg, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; - auto src_3 = &src_rows.at(0, border_offsets.c3())[index]; - auto src_4 = &src_rows.at(0, border_offsets.c4())[index]; - auto src_5 = &src_rows.at(0, border_offsets.c5())[index]; - auto src_6 = &src_rows.at(0, border_offsets.c6())[index]; - auto src_7 = &src_rows.at(0, border_offsets.c7())[index]; - auto src_8 = &src_rows.at(0, border_offsets.c8())[index]; - auto src_9 = &src_rows.at(0, border_offsets.c9())[index]; - auto src_10 = &src_rows.at(0, border_offsets.c10())[index]; - auto src_11 = &src_rows.at(0, border_offsets.c11())[index]; - auto src_12 = &src_rows.at(0, border_offsets.c12())[index]; - auto src_13 = &src_rows.at(0, border_offsets.c13())[index]; - auto src_14 = &src_rows.at(0, border_offsets.c14())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; + auto src_5 = &src_rows.at(0, border_offsets.c(5))[index]; + auto src_6 = &src_rows.at(0, border_offsets.c(6))[index]; + auto src_7 = &src_rows.at(0, border_offsets.c(7))[index]; + auto src_8 = &src_rows.at(0, border_offsets.c(8))[index]; + auto src_9 = &src_rows.at(0, border_offsets.c(9))[index]; + auto src_10 = &src_rows.at(0, border_offsets.c(10))[index]; + auto src_11 = &src_rows.at(0, border_offsets.c(11))[index]; + auto src_12 = &src_rows.at(0, border_offsets.c(12))[index]; + auto src_13 = &src_rows.at(0, border_offsets.c(13))[index]; + auto src_14 = &src_rows.at(0, border_offsets.c(14))[index]; BufferVectorType src_0_0 = svld1(pg, &src_0[0]); BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); @@ -195,35 +195,35 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferVectorType src_0 = - svld1(pg, &src_rows.at(0, border_offsets.c0())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(0))[index]); BufferVectorType src_1 = - svld1(pg, &src_rows.at(0, border_offsets.c1())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(1))[index]); BufferVectorType src_2 = - svld1(pg, &src_rows.at(0, border_offsets.c2())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(2))[index]); BufferVectorType src_3 = - svld1(pg, &src_rows.at(0, border_offsets.c3())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(3))[index]); BufferVectorType src_4 = - svld1(pg, &src_rows.at(0, border_offsets.c4())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(4))[index]); BufferVectorType src_5 = - svld1(pg, &src_rows.at(0, border_offsets.c5())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(5))[index]); BufferVectorType src_6 = - svld1(pg, &src_rows.at(0, border_offsets.c6())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(6))[index]); BufferVectorType src_7 = - svld1(pg, &src_rows.at(0, border_offsets.c7())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(7))[index]); BufferVectorType src_8 = - svld1(pg, &src_rows.at(0, border_offsets.c8())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(8))[index]); BufferVectorType src_9 = - svld1(pg, &src_rows.at(0, border_offsets.c9())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(9))[index]); BufferVectorType src_10 = - svld1(pg, &src_rows.at(0, border_offsets.c10())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(10))[index]); BufferVectorType src_11 = - svld1(pg, &src_rows.at(0, border_offsets.c11())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(11))[index]); BufferVectorType src_12 = - svld1(pg, &src_rows.at(0, border_offsets.c12())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(12))[index]); BufferVectorType src_13 = - svld1(pg, &src_rows.at(0, border_offsets.c13())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(13))[index]); BufferVectorType src_14 = - svld1(pg, &src_rows.at(0, border_offsets.c14())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(14))[index]); filter_.horizontal_vector_path(pg, src_0, src_1, src_2, src_3, src_4, src_5, src_6, src_7, src_8, src_9, src_10, src_11, src_12, src_13, src_14, &dst_rows[index]); @@ -234,21 +234,21 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferType src[15]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; - src[3] = src_rows.at(0, border_offsets.c3())[index]; - src[4] = src_rows.at(0, border_offsets.c4())[index]; - src[5] = src_rows.at(0, border_offsets.c5())[index]; - src[6] = src_rows.at(0, border_offsets.c6())[index]; - src[7] = src_rows.at(0, border_offsets.c7())[index]; - src[8] = src_rows.at(0, border_offsets.c8())[index]; - src[9] = src_rows.at(0, border_offsets.c9())[index]; - src[10] = src_rows.at(0, border_offsets.c10())[index]; - src[11] = src_rows.at(0, border_offsets.c11())[index]; - src[12] = src_rows.at(0, border_offsets.c12())[index]; - src[13] = src_rows.at(0, border_offsets.c13())[index]; - src[14] = src_rows.at(0, border_offsets.c14())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; + src[3] = src_rows.at(0, border_offsets.c(3))[index]; + src[4] = src_rows.at(0, border_offsets.c(4))[index]; + src[5] = src_rows.at(0, border_offsets.c(5))[index]; + src[6] = src_rows.at(0, border_offsets.c(6))[index]; + src[7] = src_rows.at(0, border_offsets.c(7))[index]; + src[8] = src_rows.at(0, border_offsets.c(8))[index]; + src[9] = src_rows.at(0, border_offsets.c(9))[index]; + src[10] = src_rows.at(0, border_offsets.c(10))[index]; + src[11] = src_rows.at(0, border_offsets.c(11))[index]; + src[12] = src_rows.at(0, border_offsets.c(12))[index]; + src[13] = src_rows.at(0, border_offsets.c(13))[index]; + src[14] = src_rows.at(0, border_offsets.c(14))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/separable_filter_3x3_neon.h b/kleidicv/include/kleidicv/separable_filter_3x3_neon.h index 3fecea047..d26e54d86 100644 --- a/kleidicv/include/kleidicv/separable_filter_3x3_neon.h +++ b/kleidicv/include/kleidicv/separable_filter_3x3_neon.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_3X3_NEON_H #include "kleidicv/neon.h" -#include "kleidicv/workspace/border_3x3.h" +#include "kleidicv/workspace/border.h" namespace KLEIDICV_TARGET_NAMESPACE { @@ -26,7 +26,7 @@ class SeparableFilter { using BufferVecTraits = typename neon::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo3x3; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -41,9 +41,9 @@ class SeparableFilter { SourceVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { - auto src_0 = &src_rows.at(border_offsets.c0())[index]; - auto src_1 = &src_rows.at(border_offsets.c1())[index]; - auto src_2 = &src_rows.at(border_offsets.c2())[index]; + auto src_0 = &src_rows.at(border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(border_offsets.c(2))[index]; auto src_0_x2 = vld1q_x2(&src_0[0]); auto src_1_x2 = vld1q_x2(&src_1[0]); @@ -64,17 +64,17 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { SourceVectorType src[3]; - src[0] = vld1q(&src_rows.at(border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(border_offsets.c2())[index]); + src[0] = vld1q(&src_rows.at(border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(border_offsets.c(2))[index]); filter_.vertical_vector_path(src, &dst_rows[index]); }); loop.tail([&](size_t index) { SourceType src[3]; - src[0] = src_rows.at(border_offsets.c0())[index]; - src[1] = src_rows.at(border_offsets.c1())[index]; - src[2] = src_rows.at(border_offsets.c2())[index]; + src[0] = src_rows.at(border_offsets.c(0))[index]; + src[1] = src_rows.at(border_offsets.c(1))[index]; + src[2] = src_rows.at(border_offsets.c(2))[index]; filter_.vertical_scalar_path(src, &dst_rows[index]); }); } @@ -86,9 +86,9 @@ class SeparableFilter { BufferVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; auto src_0_x2 = vld1q_x2(&src_0[0]); auto src_1_x2 = vld1q_x2(&src_1[0]); @@ -109,9 +109,9 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { BufferVectorType src[3]; - src[0] = vld1q(&src_rows.at(0, border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(0, border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(0, border_offsets.c2())[index]); + src[0] = vld1q(&src_rows.at(0, border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(0, border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(0, border_offsets.c(2))[index]); filter_.horizontal_vector_path(src, &dst_rows[index]); }); @@ -135,9 +135,9 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const { BufferType src[3]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/separable_filter_3x3_sc.h b/kleidicv/include/kleidicv/separable_filter_3x3_sc.h index 6f624ae1c..8c7e092ab 100644 --- a/kleidicv/include/kleidicv/separable_filter_3x3_sc.h +++ b/kleidicv/include/kleidicv/separable_filter_3x3_sc.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_3X3_SC_H #include "kleidicv/sve2.h" -#include "kleidicv/workspace/border_3x3.h" +#include "kleidicv/workspace/border.h" // It is used by SVE2 and SME2, the actual namespace will reflect it. namespace KLEIDICV_TARGET_NAMESPACE { @@ -29,7 +29,7 @@ class SeparableFilter { typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo3x3; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -95,11 +95,11 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { SourceVectorType src_0 = - svld1(pg, &src_rows.at(border_offsets.c0())[index]); + svld1(pg, &src_rows.at(border_offsets.c(0))[index]); SourceVectorType src_1 = - svld1(pg, &src_rows.at(border_offsets.c1())[index]); + svld1(pg, &src_rows.at(border_offsets.c(1))[index]); SourceVectorType src_2 = - svld1(pg, &src_rows.at(border_offsets.c2())[index]); + svld1(pg, &src_rows.at(border_offsets.c(2))[index]); filter_.vertical_vector_path(pg, src_0, src_1, src_2, &dst_rows[index]); } @@ -107,9 +107,9 @@ class SeparableFilter { svbool_t pg, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; BufferVectorType src_0_0 = svld1(pg, &src_0[0]); BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); @@ -130,11 +130,11 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferVectorType src_0 = - svld1(pg, &src_rows.at(0, border_offsets.c0())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(0))[index]); BufferVectorType src_1 = - svld1(pg, &src_rows.at(0, border_offsets.c1())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(1))[index]); BufferVectorType src_2 = - svld1(pg, &src_rows.at(0, border_offsets.c2())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(2))[index]); filter_.horizontal_vector_path(pg, src_0, src_1, src_2, &dst_rows[index]); } @@ -143,9 +143,9 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferType src[3]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/separable_filter_5x5_neon.h b/kleidicv/include/kleidicv/separable_filter_5x5_neon.h index 34f4290d7..46746dbd8 100644 --- a/kleidicv/include/kleidicv/separable_filter_5x5_neon.h +++ b/kleidicv/include/kleidicv/separable_filter_5x5_neon.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_5X5_NEON_H #include "kleidicv/neon.h" -#include "kleidicv/workspace/border_5x5.h" +#include "kleidicv/workspace/border.h" namespace KLEIDICV_TARGET_NAMESPACE { @@ -26,7 +26,7 @@ class SeparableFilter { using BufferVecTraits = typename neon::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -42,21 +42,21 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { SourceVectorType src[5]; - src[0] = vld1q(&src_rows.at(border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(border_offsets.c2())[index]); - src[3] = vld1q(&src_rows.at(border_offsets.c3())[index]); - src[4] = vld1q(&src_rows.at(border_offsets.c4())[index]); + src[0] = vld1q(&src_rows.at(border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(border_offsets.c(2))[index]); + src[3] = vld1q(&src_rows.at(border_offsets.c(3))[index]); + src[4] = vld1q(&src_rows.at(border_offsets.c(4))[index]); filter_.vertical_vector_path(src, &dst_rows[index]); }); loop.tail([&](size_t index) { SourceType src[5]; - src[0] = src_rows.at(border_offsets.c0())[index]; - src[1] = src_rows.at(border_offsets.c1())[index]; - src[2] = src_rows.at(border_offsets.c2())[index]; - src[3] = src_rows.at(border_offsets.c3())[index]; - src[4] = src_rows.at(border_offsets.c4())[index]; + src[0] = src_rows.at(border_offsets.c(0))[index]; + src[1] = src_rows.at(border_offsets.c(1))[index]; + src[2] = src_rows.at(border_offsets.c(2))[index]; + src[3] = src_rows.at(border_offsets.c(3))[index]; + src[4] = src_rows.at(border_offsets.c(4))[index]; filter_.vertical_scalar_path(src, &dst_rows[index]); }); } @@ -68,11 +68,11 @@ class SeparableFilter { BufferVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; - auto src_3 = &src_rows.at(0, border_offsets.c3())[index]; - auto src_4 = &src_rows.at(0, border_offsets.c4())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; BufferVectorType src_a[5], src_b[5]; src_a[0] = vld1q(&src_0[0]); @@ -93,11 +93,11 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { BufferVectorType src[5]; - src[0] = vld1q(&src_rows.at(0, border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(0, border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(0, border_offsets.c2())[index]); - src[3] = vld1q(&src_rows.at(0, border_offsets.c3())[index]); - src[4] = vld1q(&src_rows.at(0, border_offsets.c4())[index]); + src[0] = vld1q(&src_rows.at(0, border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(0, border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(0, border_offsets.c(2))[index]); + src[3] = vld1q(&src_rows.at(0, border_offsets.c(3))[index]); + src[4] = vld1q(&src_rows.at(0, border_offsets.c(4))[index]); filter_.horizontal_vector_path(src, &dst_rows[index]); }); @@ -121,11 +121,11 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const { BufferType src[5]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; - src[3] = src_rows.at(0, border_offsets.c3())[index]; - src[4] = src_rows.at(0, border_offsets.c4())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; + src[3] = src_rows.at(0, border_offsets.c(3))[index]; + src[4] = src_rows.at(0, border_offsets.c(4))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/separable_filter_5x5_sc.h b/kleidicv/include/kleidicv/separable_filter_5x5_sc.h index 909e8ce18..3ca4075cf 100644 --- a/kleidicv/include/kleidicv/separable_filter_5x5_sc.h +++ b/kleidicv/include/kleidicv/separable_filter_5x5_sc.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_5X5_SC_H #include "kleidicv/sve2.h" -#include "kleidicv/workspace/border_5x5.h" +#include "kleidicv/workspace/border.h" // It is used by SVE2 and SME2, the actual namespace will reflect it. namespace KLEIDICV_TARGET_NAMESPACE { @@ -29,7 +29,7 @@ class SeparableFilter { typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -95,15 +95,15 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { SourceVectorType src_0 = - svld1(pg, &src_rows.at(border_offsets.c0())[index]); + svld1(pg, &src_rows.at(border_offsets.c(0))[index]); SourceVectorType src_1 = - svld1(pg, &src_rows.at(border_offsets.c1())[index]); + svld1(pg, &src_rows.at(border_offsets.c(1))[index]); SourceVectorType src_2 = - svld1(pg, &src_rows.at(border_offsets.c2())[index]); + svld1(pg, &src_rows.at(border_offsets.c(2))[index]); SourceVectorType src_3 = - svld1(pg, &src_rows.at(border_offsets.c3())[index]); + svld1(pg, &src_rows.at(border_offsets.c(3))[index]); SourceVectorType src_4 = - svld1(pg, &src_rows.at(border_offsets.c4())[index]); + svld1(pg, &src_rows.at(border_offsets.c(4))[index]); filter_.vertical_vector_path(pg, src_0, src_1, src_2, src_3, src_4, &dst_rows[index]); } @@ -112,11 +112,11 @@ class SeparableFilter { svbool_t pg, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; - auto src_3 = &src_rows.at(0, border_offsets.c3())[index]; - auto src_4 = &src_rows.at(0, border_offsets.c4())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; BufferVectorType src_0_0 = svld1(pg, &src_0[0]); BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); @@ -141,15 +141,15 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferVectorType src_0 = - svld1(pg, &src_rows.at(0, border_offsets.c0())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(0))[index]); BufferVectorType src_1 = - svld1(pg, &src_rows.at(0, border_offsets.c1())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(1))[index]); BufferVectorType src_2 = - svld1(pg, &src_rows.at(0, border_offsets.c2())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(2))[index]); BufferVectorType src_3 = - svld1(pg, &src_rows.at(0, border_offsets.c3())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(3))[index]); BufferVectorType src_4 = - svld1(pg, &src_rows.at(0, border_offsets.c4())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(4))[index]); filter_.horizontal_vector_path(pg, src_0, src_1, src_2, src_3, src_4, &dst_rows[index]); } @@ -159,11 +159,11 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferType src[5]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; - src[3] = src_rows.at(0, border_offsets.c3())[index]; - src[4] = src_rows.at(0, border_offsets.c4())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; + src[3] = src_rows.at(0, border_offsets.c(3))[index]; + src[4] = src_rows.at(0, border_offsets.c(4))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/separable_filter_7x7_neon.h b/kleidicv/include/kleidicv/separable_filter_7x7_neon.h index 4305d9d06..2d804d933 100644 --- a/kleidicv/include/kleidicv/separable_filter_7x7_neon.h +++ b/kleidicv/include/kleidicv/separable_filter_7x7_neon.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_7X7_NEON_H #include "kleidicv/neon.h" -#include "kleidicv/workspace/border_7x7.h" +#include "kleidicv/workspace/border.h" namespace KLEIDICV_TARGET_NAMESPACE { @@ -26,7 +26,7 @@ class SeparableFilter { using BufferVecTraits = typename neon::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo7x7; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -42,25 +42,25 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { SourceVectorType src[7]; - src[0] = vld1q(&src_rows.at(border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(border_offsets.c2())[index]); - src[3] = vld1q(&src_rows.at(border_offsets.c3())[index]); - src[4] = vld1q(&src_rows.at(border_offsets.c4())[index]); - src[5] = vld1q(&src_rows.at(border_offsets.c5())[index]); - src[6] = vld1q(&src_rows.at(border_offsets.c6())[index]); + src[0] = vld1q(&src_rows.at(border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(border_offsets.c(2))[index]); + src[3] = vld1q(&src_rows.at(border_offsets.c(3))[index]); + src[4] = vld1q(&src_rows.at(border_offsets.c(4))[index]); + src[5] = vld1q(&src_rows.at(border_offsets.c(5))[index]); + src[6] = vld1q(&src_rows.at(border_offsets.c(6))[index]); filter_.vertical_vector_path(src, &dst_rows[index]); }); loop.tail([&](size_t index) { SourceType src[7]; - src[0] = src_rows.at(border_offsets.c0())[index]; - src[1] = src_rows.at(border_offsets.c1())[index]; - src[2] = src_rows.at(border_offsets.c2())[index]; - src[3] = src_rows.at(border_offsets.c3())[index]; - src[4] = src_rows.at(border_offsets.c4())[index]; - src[5] = src_rows.at(border_offsets.c5())[index]; - src[6] = src_rows.at(border_offsets.c6())[index]; + src[0] = src_rows.at(border_offsets.c(0))[index]; + src[1] = src_rows.at(border_offsets.c(1))[index]; + src[2] = src_rows.at(border_offsets.c(2))[index]; + src[3] = src_rows.at(border_offsets.c(3))[index]; + src[4] = src_rows.at(border_offsets.c(4))[index]; + src[5] = src_rows.at(border_offsets.c(5))[index]; + src[6] = src_rows.at(border_offsets.c(6))[index]; filter_.vertical_scalar_path(src, &dst_rows[index]); }); } @@ -72,13 +72,13 @@ class SeparableFilter { BufferVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; - auto src_3 = &src_rows.at(0, border_offsets.c3())[index]; - auto src_4 = &src_rows.at(0, border_offsets.c4())[index]; - auto src_5 = &src_rows.at(0, border_offsets.c5())[index]; - auto src_6 = &src_rows.at(0, border_offsets.c6())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; + auto src_5 = &src_rows.at(0, border_offsets.c(5))[index]; + auto src_6 = &src_rows.at(0, border_offsets.c(6))[index]; BufferVectorType src_a[7], src_b[7]; src_a[0] = vld1q(&src_0[0]); @@ -103,13 +103,13 @@ class SeparableFilter { loop.unroll_once([&](size_t index) { BufferVectorType src[7]; - src[0] = vld1q(&src_rows.at(0, border_offsets.c0())[index]); - src[1] = vld1q(&src_rows.at(0, border_offsets.c1())[index]); - src[2] = vld1q(&src_rows.at(0, border_offsets.c2())[index]); - src[3] = vld1q(&src_rows.at(0, border_offsets.c3())[index]); - src[4] = vld1q(&src_rows.at(0, border_offsets.c4())[index]); - src[5] = vld1q(&src_rows.at(0, border_offsets.c5())[index]); - src[6] = vld1q(&src_rows.at(0, border_offsets.c6())[index]); + src[0] = vld1q(&src_rows.at(0, border_offsets.c(0))[index]); + src[1] = vld1q(&src_rows.at(0, border_offsets.c(1))[index]); + src[2] = vld1q(&src_rows.at(0, border_offsets.c(2))[index]); + src[3] = vld1q(&src_rows.at(0, border_offsets.c(3))[index]); + src[4] = vld1q(&src_rows.at(0, border_offsets.c(4))[index]); + src[5] = vld1q(&src_rows.at(0, border_offsets.c(5))[index]); + src[6] = vld1q(&src_rows.at(0, border_offsets.c(6))[index]); filter_.horizontal_vector_path(src, &dst_rows[index]); }); @@ -133,13 +133,13 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const { BufferType src[7]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; - src[3] = src_rows.at(0, border_offsets.c3())[index]; - src[4] = src_rows.at(0, border_offsets.c4())[index]; - src[5] = src_rows.at(0, border_offsets.c5())[index]; - src[6] = src_rows.at(0, border_offsets.c6())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; + src[3] = src_rows.at(0, border_offsets.c(3))[index]; + src[4] = src_rows.at(0, border_offsets.c(4))[index]; + src[5] = src_rows.at(0, border_offsets.c(5))[index]; + src[6] = src_rows.at(0, border_offsets.c(6))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/separable_filter_7x7_sc.h b/kleidicv/include/kleidicv/separable_filter_7x7_sc.h index 33f204a10..eab3df4b1 100644 --- a/kleidicv/include/kleidicv/separable_filter_7x7_sc.h +++ b/kleidicv/include/kleidicv/separable_filter_7x7_sc.h @@ -6,7 +6,7 @@ #define KLEIDICV_SEPARABLE_FILTER_7X7_SC_H #include "kleidicv/sve2.h" -#include "kleidicv/workspace/border_7x7.h" +#include "kleidicv/workspace/border.h" // It is used by SVE2 and SME2, the actual namespace will reflect it. namespace KLEIDICV_TARGET_NAMESPACE { @@ -29,7 +29,7 @@ class SeparableFilter { typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; using BufferVectorType = typename BufferVecTraits::VectorType; using BorderInfoType = - typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo7x7; + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo; using BorderType = FixedBorderType; using BorderOffsets = typename BorderInfoType::Offsets; @@ -95,19 +95,19 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { SourceVectorType src_0 = - svld1(pg, &src_rows.at(border_offsets.c0())[index]); + svld1(pg, &src_rows.at(border_offsets.c(0))[index]); SourceVectorType src_1 = - svld1(pg, &src_rows.at(border_offsets.c1())[index]); + svld1(pg, &src_rows.at(border_offsets.c(1))[index]); SourceVectorType src_2 = - svld1(pg, &src_rows.at(border_offsets.c2())[index]); + svld1(pg, &src_rows.at(border_offsets.c(2))[index]); SourceVectorType src_3 = - svld1(pg, &src_rows.at(border_offsets.c3())[index]); + svld1(pg, &src_rows.at(border_offsets.c(3))[index]); SourceVectorType src_4 = - svld1(pg, &src_rows.at(border_offsets.c4())[index]); + svld1(pg, &src_rows.at(border_offsets.c(4))[index]); SourceVectorType src_5 = - svld1(pg, &src_rows.at(border_offsets.c5())[index]); + svld1(pg, &src_rows.at(border_offsets.c(5))[index]); SourceVectorType src_6 = - svld1(pg, &src_rows.at(border_offsets.c6())[index]); + svld1(pg, &src_rows.at(border_offsets.c(6))[index]); filter_.vertical_vector_path(pg, src_0, src_1, src_2, src_3, src_4, src_5, src_6, &dst_rows[index]); } @@ -116,13 +116,13 @@ class SeparableFilter { svbool_t pg, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { - auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; - auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; - auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; - auto src_3 = &src_rows.at(0, border_offsets.c3())[index]; - auto src_4 = &src_rows.at(0, border_offsets.c4())[index]; - auto src_5 = &src_rows.at(0, border_offsets.c5())[index]; - auto src_6 = &src_rows.at(0, border_offsets.c6())[index]; + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; + auto src_5 = &src_rows.at(0, border_offsets.c(5))[index]; + auto src_6 = &src_rows.at(0, border_offsets.c(6))[index]; BufferVectorType src_0_0 = svld1(pg, &src_0[0]); BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); @@ -151,19 +151,19 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferVectorType src_0 = - svld1(pg, &src_rows.at(0, border_offsets.c0())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(0))[index]); BufferVectorType src_1 = - svld1(pg, &src_rows.at(0, border_offsets.c1())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(1))[index]); BufferVectorType src_2 = - svld1(pg, &src_rows.at(0, border_offsets.c2())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(2))[index]); BufferVectorType src_3 = - svld1(pg, &src_rows.at(0, border_offsets.c3())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(3))[index]); BufferVectorType src_4 = - svld1(pg, &src_rows.at(0, border_offsets.c4())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(4))[index]); BufferVectorType src_5 = - svld1(pg, &src_rows.at(0, border_offsets.c5())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(5))[index]); BufferVectorType src_6 = - svld1(pg, &src_rows.at(0, border_offsets.c6())[index]); + svld1(pg, &src_rows.at(0, border_offsets.c(6))[index]); filter_.horizontal_vector_path(pg, src_0, src_1, src_2, src_3, src_4, src_5, src_6, &dst_rows[index]); } @@ -173,13 +173,13 @@ class SeparableFilter { BorderOffsets border_offsets, size_t index) const KLEIDICV_STREAMING_COMPATIBLE { BufferType src[7]; - src[0] = src_rows.at(0, border_offsets.c0())[index]; - src[1] = src_rows.at(0, border_offsets.c1())[index]; - src[2] = src_rows.at(0, border_offsets.c2())[index]; - src[3] = src_rows.at(0, border_offsets.c3())[index]; - src[4] = src_rows.at(0, border_offsets.c4())[index]; - src[5] = src_rows.at(0, border_offsets.c5())[index]; - src[6] = src_rows.at(0, border_offsets.c6())[index]; + src[0] = src_rows.at(0, border_offsets.c(0))[index]; + src[1] = src_rows.at(0, border_offsets.c(1))[index]; + src[2] = src_rows.at(0, border_offsets.c(2))[index]; + src[3] = src_rows.at(0, border_offsets.c(3))[index]; + src[4] = src_rows.at(0, border_offsets.c(4))[index]; + src[5] = src_rows.at(0, border_offsets.c(5))[index]; + src[6] = src_rows.at(0, border_offsets.c(6))[index]; filter_.horizontal_scalar_path(src, &dst_rows[index]); } diff --git a/kleidicv/include/kleidicv/workspace/border.h b/kleidicv/include/kleidicv/workspace/border.h new file mode 100644 index 000000000..a1b4164bc --- /dev/null +++ b/kleidicv/include/kleidicv/workspace/border.h @@ -0,0 +1,265 @@ +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_WORKSPACE_BORDER_H +#define KLEIDICV_WORKSPACE_BORDER_H + +#include "border_types.h" +#include "kleidicv/kleidicv.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +// Border offsets for fixed-size filters. +template +class FixedBorderInfo; + +template +class FixedBorderInfo final { + public: + // Simple object holding read-only constant offsets. + // Note: We are not using the default constructor, but it is defined for the + // unreachable cases in the code below. NOLINTNEXTLINE + class Offsets final { + public: + Offsets() = default; + + template + explicit Offsets(Args... args) : offsets_{static_cast(args)...} { + static_assert(sizeof...(args) == KernelSize); + } + + size_t c(size_t i) const { return offsets_[i]; } + + private: + template + static inline Offsets from_seq(std::integer_sequence) { + return Offsets{SeqNum...}; + } + + friend class FixedBorderInfo; + size_t offsets_[KernelSize]; + }; + + FixedBorderInfo(size_t length, FixedBorderType border_type) + : length_(length), border_type_(border_type) {} + + // Returns offsets without the influence of any border. + Offsets offsets_without_border() const KLEIDICV_STREAMING_COMPATIBLE { + constexpr auto seq = std::make_integer_sequence> 1)>{}; + return get_no_border(seq); + } + + // Returns offsets for rows/columns affected by the top or the left border. + Offsets offsets_with_top_or_left_border(size_t index) const + KLEIDICV_STREAMING_COMPATIBLE { + constexpr auto seq = std::make_integer_sequence> 1)>{}; + switch (border_type_) { + case FixedBorderType::REPLICATE: + return get_offsets(index, seq); + break; + + case FixedBorderType::REFLECT: + return get_offsets(index, seq); + break; + + case FixedBorderType::WRAP: + return get_offsets(index, seq); + break; + + case FixedBorderType::REVERSE: + return get_offsets(index, seq); + break; + } + // Unreachable. Compiler should emit a warning-as-error if any cases are + // uncovered above. + return Offsets{}; // GCOVR_EXCL_LINE + } + + // Returns offsets for rows/columns affected by the bottom or the + // right border. + Offsets offsets_with_bottom_or_right_border(size_t index) const + KLEIDICV_STREAMING_COMPATIBLE { + constexpr auto seq = std::make_integer_sequence> 1)>{}; + index = length_ - index - 1; + switch (border_type_) { + case FixedBorderType::REPLICATE: + return get_offsets(index, seq); + break; + + case FixedBorderType::REFLECT: + return get_offsets(index, seq); + break; + + case FixedBorderType::WRAP: + return get_offsets(index, seq); + break; + + case FixedBorderType::REVERSE: + return get_offsets(index, seq); + break; + } + // Unreachable. Compiler should emit a warning-as-error if any cases are + // uncovered above. + return Offsets{}; // GCOVR_EXCL_LINE + } + + // Returns offsets for rows/columns affected by any border. + Offsets offsets_with_border(size_t row_or_column_index) const + KLEIDICV_STREAMING_COMPATIBLE { + if (row_or_column_index < (KernelSize >> 1)) { + // Rows and columns have the same offsets. + return offsets_with_top_or_left_border(row_or_column_index); + } + if (row_or_column_index >= (length_ - (KernelSize >> 1))) { + // Rows and columns have the same offsets. + return offsets_with_bottom_or_right_border(row_or_column_index); + } + return offsets_without_border(); + } + + private: + // Creates the Offsets object containing offsets in the interval + // [-(KernelSize / 2), KernelSize / 2]. + template + inline Offsets get_no_border(std::integer_sequence) const + KLEIDICV_STREAMING_COMPATIBLE { + // Example (15x15): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, + // 7}; + constexpr int k_2 = static_cast(KernelSize >> 1); + return Offsets{(SeqNum - k_2)..., 0, (SeqNum + 1)...}; + } + + // Creates the Offsets object containing offsets in various intervals + // depending on the row/column index, border type as well the border + // position used. + template + static constexpr auto generate_border_offsets( + std::integer_sequence) { + constexpr int k_2 = static_cast(KernelSize >> 1); + if constexpr (BorderType == FixedBorderType::REPLICATE && !IsRight) { + // Example (15x15, index 4, left): Offsets{-4, -4, -4, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 5, 6, 7}; + return std::integer_sequence < int, + (SeqNum - k_2 < -Index) ? -Index : (SeqNum - k_2)..., 0, + (SeqNum + 1)... > {}; + } + + if constexpr (BorderType == FixedBorderType::REPLICATE && IsRight) { + // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 4, 4, 4}; + return std::integer_sequence < int, (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? Index : (SeqNum + 1)... > {}; + } + + if constexpr (BorderType == FixedBorderType::REFLECT && !IsRight) { + // Example (15x15, index 4, left): Offsets{-2, -3, -4, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 5, 6, 7}; + return std::integer_sequence < int, + (SeqNum - k_2 < -Index) ? (k_2 - (Index << 1) - (SeqNum + 1)) + : (SeqNum - k_2)..., + 0, (SeqNum + 1)... > {}; + } + + if constexpr (BorderType == FixedBorderType::REFLECT && IsRight) { + // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 4, 3, 2}; + return std::integer_sequence < int, (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? ((Index << 1) - SeqNum) : (SeqNum + 1)... > {}; + } + + if constexpr (BorderType == FixedBorderType::REVERSE && !IsRight) { + // Example (15x15, index 4, left): Offsets{-1, -2, -3, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 5, 6, 7}; + return std::integer_sequence < int, + (SeqNum - k_2 < -Index) ? (k_2 - (Index << 1) - SeqNum) + : (SeqNum - k_2)..., + 0, (SeqNum + 1)... > {}; + } + + if constexpr (BorderType == FixedBorderType::REVERSE && IsRight) { + // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 3, 2, 1}; + return std::integer_sequence < int, (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? ((Index << 1) - (SeqNum + 1)) + : (SeqNum + 1)... > {}; + } + } + + template + inline Offsets generate_border_offsets_wrap( + std::integer_sequence) const + KLEIDICV_STREAMING_COMPATIBLE { + constexpr int k_2 = static_cast(KernelSize >> 1); + if constexpr (!IsRight) { + // Example (15x15, index 4, left): Offsets{length_ - 7, length_ - 6, + // length_ - 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}; + return Offsets{(SeqNum - k_2 < -Index) ? (SeqNum - k_2 + length_) + : (SeqNum - k_2)..., + 0, (SeqNum + 1)...}; + } + + if constexpr (IsRight) { + // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 5 - length_, 6 - length_, 7 - length_}; + return Offsets{ + (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? (SeqNum - length_ + 1) : (SeqNum + 1)...}; + } + } + + // NOLINTBEGIN(readability-function-cognitive-complexity, + // readability-avoid-nested-conditional-operator) + template + inline Offsets get_offsets(int index, + std::integer_sequence seq) const + KLEIDICV_STREAMING_COMPATIBLE { + if constexpr (BorderType == FixedBorderType::WRAP) { + return (index == 0) ? generate_border_offsets_wrap<0, IsRight>(seq) + : (index == 1) ? generate_border_offsets_wrap<1, IsRight>(seq) + : (index == 2) ? generate_border_offsets_wrap<2, IsRight>(seq) + : (index == 3) ? generate_border_offsets_wrap<3, IsRight>(seq) + : (index == 4) ? generate_border_offsets_wrap<4, IsRight>(seq) + : (index == 5) ? generate_border_offsets_wrap<5, IsRight>(seq) + : (index == 6) ? generate_border_offsets_wrap<6, IsRight>(seq) + : (index == 7) ? generate_border_offsets_wrap<7, IsRight>(seq) + : (index == 8) ? generate_border_offsets_wrap<8, IsRight>(seq) + : (index == 9) ? generate_border_offsets_wrap<9, IsRight>(seq) + : (index == 10) ? generate_border_offsets_wrap<10, IsRight>(seq) + : (index == 11) ? generate_border_offsets_wrap<11, IsRight>(seq) + : (index == 12) ? generate_border_offsets_wrap<12, IsRight>(seq) + : (index == 13) ? generate_border_offsets_wrap<13, IsRight>(seq) + : (index == 14) ? generate_border_offsets_wrap<14, IsRight>(seq) + : Offsets{}; + } else { + // clang-format off + return + (index == 0) ? Offsets::from_seq(generate_border_offsets<0, BorderType, IsRight>(seq)) + : (index == 1) ? Offsets::from_seq(generate_border_offsets<1, BorderType, IsRight>(seq)) + : (index == 2) ? Offsets::from_seq(generate_border_offsets<2, BorderType, IsRight>(seq)) + : (index == 3) ? Offsets::from_seq(generate_border_offsets<3, BorderType, IsRight>(seq)) + : (index == 4) ? Offsets::from_seq(generate_border_offsets<4, BorderType, IsRight>(seq)) + : (index == 5) ? Offsets::from_seq(generate_border_offsets<5, BorderType, IsRight>(seq)) + : (index == 6) ? Offsets::from_seq(generate_border_offsets<6, BorderType, IsRight>(seq)) + : (index == 7) ? Offsets::from_seq(generate_border_offsets<7, BorderType, IsRight>(seq)) + : (index == 8) ? Offsets::from_seq(generate_border_offsets<8, BorderType, IsRight>(seq)) + : (index == 9) ? Offsets::from_seq(generate_border_offsets<9, BorderType, IsRight>(seq)) + : (index == 10) ? Offsets::from_seq(generate_border_offsets<10, BorderType, IsRight>(seq)) + : (index == 11) ? Offsets::from_seq(generate_border_offsets<11, BorderType, IsRight>(seq)) + : (index == 12) ? Offsets::from_seq(generate_border_offsets<12, BorderType, IsRight>(seq)) + : (index == 13) ? Offsets::from_seq(generate_border_offsets<13, BorderType, IsRight>(seq)) + : (index == 14) ? Offsets::from_seq(generate_border_offsets<14, BorderType, IsRight>(seq)) + : Offsets{}; + // clang-format on + } + } + // NOLINTEND(readability-function-cognitive-complexity, + // readability-avoid-nested-conditional-operator) + + size_t length_; + FixedBorderType border_type_; +}; // end of class FixedBorderInfo + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_WORKSPACE_BORDER_H diff --git a/kleidicv/include/kleidicv/workspace/border_15x15.h b/kleidicv/include/kleidicv/workspace/border_15x15.h deleted file mode 100644 index eb3ae12ad..000000000 --- a/kleidicv/include/kleidicv/workspace/border_15x15.h +++ /dev/null @@ -1,276 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_WORKSPACE_BORDER_15X15_H -#define KLEIDICV_WORKSPACE_BORDER_15X15_H - -#include "border_types.h" -#include "kleidicv/kleidicv.h" - -namespace KLEIDICV_TARGET_NAMESPACE { - -// Border offsets for fixed-size filters. -template -class FixedBorderInfo; - -// Border offsets for 15x15 filters. -template -class FixedBorderInfo final { - public: - // Simple object holding read-only constant offsets. - class Offsets final { - public: - // NOLINTBEGIN(hicpp-member-init) - Offsets() = default; - // NOLINTEND(hicpp-member-init) - - Offsets(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6, size_t o7, size_t o8, size_t o9, size_t o10, size_t o11, - size_t o12, size_t o13, size_t o14) - : offsets_{o0, o1, o2, o3, o4, o5, o6, o7, - o8, o9, o10, o11, o12, o13, o14} {} - - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } - size_t c3() const { return offsets_[3]; } - size_t c4() const { return offsets_[4]; } - size_t c5() const { return offsets_[5]; } - size_t c6() const { return offsets_[6]; } - size_t c7() const { return offsets_[7]; } - size_t c8() const { return offsets_[8]; } - size_t c9() const { return offsets_[9]; } - size_t c10() const { return offsets_[10]; } - size_t c11() const { return offsets_[11]; } - size_t c12() const { return offsets_[12]; } - size_t c13() const { return offsets_[13]; } - size_t c14() const { return offsets_[14]; } - - private: - size_t offsets_[15]; - }; - - FixedBorderInfo(size_t height, FixedBorderType border_type) - : height_(height), border_type_(border_type) {} - - // Returns offsets without the influence of any border. - Offsets offsets_without_border() const KLEIDICV_STREAMING_COMPATIBLE { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } - - // NOLINTBEGIN(readability-function-cognitive-complexity) - // Returns offsets for columns affected by left border. - Offsets offsets_with_left_border(size_t column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - if (column_index == 0) { - return get(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 1) { - return get(-1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 2) { - return get(-2, -2, -2, -2, -2, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 3) { - return get(-3, -3, -3, -3, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 4) { - return get(-4, -4, -4, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 5) { - return get(-5, -5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else { - return get(-6, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } - break; - - case FixedBorderType::REFLECT: - if (column_index == 0) { - return get(6, 5, 4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 1) { - return get(4, 3, 2, 1, 0, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 2) { - return get(2, 1, 0, -1, -2, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 3) { - return get(0, -1, -2, -3, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 4) { - return get(-2, -3, -4, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 5) { - return get(-4, -5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else { - return get(-6, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } - break; - - case FixedBorderType::WRAP: - if (column_index == 0) { - return get(height_ - 7, height_ - 6, height_ - 5, height_ - 4, - height_ - 3, height_ - 2, height_ - 1, 0, 1, 2, 3, 4, 5, 6, - 7); - } else if (column_index == 1) { - return get(height_ - 7, height_ - 6, height_ - 5, height_ - 4, - height_ - 3, height_ - 2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 2) { - return get(height_ - 7, height_ - 6, height_ - 5, height_ - 4, - height_ - 3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 3) { - return get(height_ - 7, height_ - 6, height_ - 5, height_ - 4, -3, -2, - -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 4) { - return get(height_ - 7, height_ - 6, height_ - 5, -4, -3, -2, -1, 0, - 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 5) { - return get(height_ - 7, height_ - 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, - 4, 5, 6, 7); - } else { - return get(height_ - 7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, - 7); - } - break; - - case FixedBorderType::REVERSE: - if (column_index == 0) { - return get(7, 6, 5, 4, 3, 2, 1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 1) { - return get(5, 4, 3, 2, 1, 0, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 2) { - return get(3, 2, 1, 0, -1, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 3) { - return get(1, 0, -1, -2, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 4) { - return get(-1, -2, -3, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else if (column_index == 5) { - return get(-3, -4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } else { - return get(-5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); - } - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - - // Returns offsets for columns affected by right border. - Offsets offsets_with_right_border(size_t column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - if (column_index == (height_ - 7)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 6); - } else if (column_index == (height_ - 6)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 5, 5); - } else if (column_index == (height_ - 5)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 4, 4, 4); - } else if (column_index == (height_ - 4)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 3, 3, 3, 3); - } else if (column_index == (height_ - 3)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 2, 2, 2, 2, 2); - } else if (column_index == (height_ - 2)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 1, 1, 1, 1, 1, 1); - } else { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0); - } - break; - - case FixedBorderType::REFLECT: - if (column_index == (height_ - 7)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 6); - } else if (column_index == (height_ - 6)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 5, 4); - } else if (column_index == (height_ - 5)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 4, 3, 2); - } else if (column_index == (height_ - 4)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 3, 2, 1, 0); - } else if (column_index == (height_ - 3)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 2, 1, 0, -1, -2); - } else if (column_index == (height_ - 2)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 1, 0, -1, -2, -3, -4); - } else { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 0, -1, -2, -3, -4, -5, -6); - } - break; - - case FixedBorderType::WRAP: - if (column_index == (height_ - 7)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, - 7 - height_); - } else if (column_index == (height_ - 6)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6 - height_, - 7 - height_); - } else if (column_index == (height_ - 5)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5 - height_, - 6 - height_, 7 - height_); - } else if (column_index == (height_ - 4)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4 - height_, - 5 - height_, 6 - height_, 7 - height_); - } else if (column_index == (height_ - 3)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3 - height_, - 4 - height_, 5 - height_, 6 - height_, 7 - height_); - } else if (column_index == (height_ - 2)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2 - height_, 3 - height_, - 4 - height_, 5 - height_, 6 - height_, 7 - height_); - } else { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1 - height_, 2 - height_, - 3 - height_, 4 - height_, 5 - height_, 6 - height_, - 7 - height_); - } - break; - - case FixedBorderType::REVERSE: - if (column_index == (height_ - 7)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 5); - } else if (column_index == (height_ - 6)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 4, 3); - } else if (column_index == (height_ - 5)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 3, 2, 1); - } else if (column_index == (height_ - 4)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 2, 1, 0, -1); - } else if (column_index == (height_ - 3)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 1, 0, -1, -2, -3); - } else if (column_index == (height_ - 2)) { - return get(-7, -6, -5, -4, -3, -2, -1, 0, 1, 0, -1, -2, -3, -4, -5); - } else { - return get(-7, -6, -5, -4, -3, -2, -1, 0, -1, -2, -3, -4, -5, -6, -7); - } - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - // NOLINTEND(readability-function-cognitive-complexity) - - // Returns offsets for rows or columns affected by any border. - Offsets offsets_with_border(size_t row_or_column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - if (row_or_column_index <= 6U) { - // Rows and columns have the same offsets. - return offsets_with_left_border(row_or_column_index); - } - if (row_or_column_index >= (height_ - 7U)) { - // Rows and columns have the same offsets. - return offsets_with_right_border(row_or_column_index); - } - return offsets_without_border(); - } - - private: - // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6, size_t o7, size_t o8, size_t o9, size_t o10, - size_t o11, size_t o12, size_t o13, - size_t o14) const KLEIDICV_STREAMING_COMPATIBLE { - return Offsets{o0, o1, o2, o3, o4, o5, o6, o7, - o8, o9, o10, o11, o12, o13, o14}; - } - - size_t height_; - FixedBorderType border_type_; -}; // end of class FixedBorderInfo - -// Shorthand for 15x15 filter border type. -template -using FixedBorderInfo15x15 = FixedBorderInfo; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_WORKSPACE_BORDER_15X15_H diff --git a/kleidicv/include/kleidicv/workspace/border_3x3.h b/kleidicv/include/kleidicv/workspace/border_3x3.h deleted file mode 100644 index ecd5627d3..000000000 --- a/kleidicv/include/kleidicv/workspace/border_3x3.h +++ /dev/null @@ -1,116 +0,0 @@ -// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_WORKSPACE_BORDER_3X3_H -#define KLEIDICV_WORKSPACE_BORDER_3X3_H - -#include "border_types.h" -#include "kleidicv/kleidicv.h" - -namespace KLEIDICV_TARGET_NAMESPACE { - -// Border offsets for fixed-size filters. -template -class FixedBorderInfo; - -// Border offsets for 3x3 filters. -template -class FixedBorderInfo final { - public: - // Simple object holding read-only constant offsets. - class Offsets final { - public: - Offsets() = default; - - Offsets(size_t o0, size_t o1, size_t o2) : offsets_{o0, o1, o2} {} - - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } - - private: - size_t offsets_[3]; - }; - - FixedBorderInfo(size_t height, FixedBorderType border_type) - : height_(height), border_type_(border_type) {} - - // Returns offsets without the influence of any border. - Offsets offsets_without_border() const { return get(-1, 0, 1); } - - // Returns offsets for columns affected by left border. - Offsets offsets_with_left_border(size_t /* column_index */) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - case FixedBorderType::REFLECT: - return get(0, 0, 1); - break; - - case FixedBorderType::WRAP: - return get(height_ - 1, 0, 1); - break; - - case FixedBorderType::REVERSE: - return get(1, 0, 1); - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - - // Returns offsets for columns affected by right border. - Offsets offsets_with_right_border(size_t /* column_index */) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - case FixedBorderType::REFLECT: - return get(-1, 0, 0); - break; - - case FixedBorderType::WRAP: - return get(-1, 0, 1 - height_); - break; - - case FixedBorderType::REVERSE: - return get(-1, 0, -1); - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - - // Returns offsets for rows or columns affected by any border. - Offsets offsets_with_border(size_t row_or_column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - if (row_or_column_index == 0U) { - // Rows and columns have the same offsets. - return offsets_with_left_border(row_or_column_index); - } - if (row_or_column_index == (height_ - 1U)) { - // Rows and columns have the same offsets. - return offsets_with_right_border(row_or_column_index); - } - return offsets_without_border(); - } - - private: - // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2) const { - return Offsets{o0, o1, o2}; - } - - size_t height_; - FixedBorderType border_type_; -}; // end of class FixedBorderInfo - -// Shorthand for 3x3 filter border type. -template -using FixedBorderInfo3x3 = FixedBorderInfo; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_WORKSPACE_BORDER_3X3_H diff --git a/kleidicv/include/kleidicv/workspace/border_5x5.h b/kleidicv/include/kleidicv/workspace/border_5x5.h deleted file mode 100644 index 06c2683bd..000000000 --- a/kleidicv/include/kleidicv/workspace/border_5x5.h +++ /dev/null @@ -1,162 +0,0 @@ -// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_WORKSPACE_BORDER_5X5_H -#define KLEIDICV_WORKSPACE_BORDER_5X5_H - -#include "border_types.h" -#include "kleidicv/kleidicv.h" - -namespace KLEIDICV_TARGET_NAMESPACE { - -// Border offsets for fixed-size filters. -template -class FixedBorderInfo; - -// Border offsets for 5x5 filters. -template -class FixedBorderInfo final { - public: - // Simple object holding read-only constant offsets. - class Offsets final { - public: - // NOLINTBEGIN(hicpp-member-init) - Offsets() = default; - // NOLINTEND(hicpp-member-init) - - Offsets(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4) - : offsets_{o0, o1, o2, o3, o4} {} - - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } - size_t c3() const { return offsets_[3]; } - size_t c4() const { return offsets_[4]; } - - private: - size_t offsets_[5]; - }; - - FixedBorderInfo(size_t height, FixedBorderType border_type) - : height_(height), border_type_(border_type) {} - - // Returns offsets without the influence of any border. - Offsets offsets_without_border() const KLEIDICV_STREAMING_COMPATIBLE { - return get(-2, -1, 0, 1, 2); - } - - // Returns offsets for columns affected by left border. - Offsets offsets_with_left_border(size_t column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - if (column_index == 0) { - return get(0, 0, 0, 1, 2); - } else { - return get(-1, -1, 0, 1, 2); - } - break; - - case FixedBorderType::REFLECT: - if (column_index == 0) { - return get(1, 0, 0, 1, 2); - } else { - return get(-1, -1, 0, 1, 2); - } - break; - - case FixedBorderType::WRAP: - if (column_index == 0) { - return get(height_ - 2, height_ - 1, 0, 1, 2); - } else { - return get(height_ - 2, -1, 0, 1, 2); - } - break; - - case FixedBorderType::REVERSE: - if (column_index == 0) { - return get(2, 1, 0, 1, 2); - } else { - return get(0, -1, 0, 1, 2); - } - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - - // Returns offsets for columns affected by right border. - Offsets offsets_with_right_border(size_t column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - if (column_index == (height_ - 2)) { - return get(-2, -1, 0, 1, 1); - } else { - return get(-2, -1, 0, 0, 0); - } - break; - - case FixedBorderType::REFLECT: - if (column_index == (height_ - 2)) { - return get(-2, -1, 0, 1, 1); - } else { - return get(-2, -1, 0, 0, -1); - } - break; - - case FixedBorderType::WRAP: - if (column_index == (height_ - 2)) { - return get(-2, -1, 0, 1, 2 - height_); - } else { - return get(-2, -1, 0, 1 - height_, 2 - height_); - } - break; - - case FixedBorderType::REVERSE: - if (column_index == (height_ - 2)) { - return get(-2, -1, 0, 1, 0); - } else { - return get(-2, -1, 0, -1, -2); - } - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - - // Returns offsets for rows or columns affected by any border. - Offsets offsets_with_border(size_t row_or_column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - if (row_or_column_index <= 1U) { - // Rows and columns have the same offsets. - return offsets_with_left_border(row_or_column_index); - } - if (row_or_column_index >= (height_ - 2U)) { - // Rows and columns have the same offsets. - return offsets_with_right_border(row_or_column_index); - } - return offsets_without_border(); - } - - private: - // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2, size_t o3, - size_t o4) const KLEIDICV_STREAMING_COMPATIBLE { - return Offsets{o0, o1, o2, o3, o4}; - } - - size_t height_; - FixedBorderType border_type_; -}; // end of class FixedBorderInfo - -// Shorthand for 5x5 filter border type. -template -using FixedBorderInfo5x5 = FixedBorderInfo; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_WORKSPACE_BORDER_5X5_H diff --git a/kleidicv/include/kleidicv/workspace/border_7x7.h b/kleidicv/include/kleidicv/workspace/border_7x7.h deleted file mode 100644 index 75bb86117..000000000 --- a/kleidicv/include/kleidicv/workspace/border_7x7.h +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_WORKSPACE_BORDER_7X7_H -#define KLEIDICV_WORKSPACE_BORDER_7X7_H - -#include "border_types.h" -#include "kleidicv/kleidicv.h" - -namespace KLEIDICV_TARGET_NAMESPACE { - -// Border offsets for fixed-size filters. -template -class FixedBorderInfo; - -// Border offsets for 7x7 filters. -template -class FixedBorderInfo final { - public: - // Simple object holding read-only constant offsets. - class Offsets final { - public: - // NOLINTBEGIN(hicpp-member-init) - Offsets() = default; - // NOLINTEND(hicpp-member-init) - - Offsets(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6) - : offsets_{o0, o1, o2, o3, o4, o5, o6} {} - - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } - size_t c3() const { return offsets_[3]; } - size_t c4() const { return offsets_[4]; } - size_t c5() const { return offsets_[5]; } - size_t c6() const { return offsets_[6]; } - - private: - size_t offsets_[7]; - }; - - FixedBorderInfo(size_t height, FixedBorderType border_type) - : height_(height), border_type_(border_type) {} - - // Returns offsets without the influence of any border. - Offsets offsets_without_border() const KLEIDICV_STREAMING_COMPATIBLE { - return get(-3, -2, -1, 0, 1, 2, 3); - } - - // Returns offsets for columns affected by left border. - Offsets offsets_with_left_border(size_t column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - if (column_index == 0) { - return get(0, 0, 0, 0, 1, 2, 3); - } else if (column_index == 1) { - return get(-1, -1, -1, 0, 1, 2, 3); - } else { - return get(-2, -2, -1, 0, 1, 2, 3); - } - break; - - case FixedBorderType::REFLECT: - if (column_index == 0) { - return get(2, 1, 0, 0, 1, 2, 3); - } else if (column_index == 1) { - return get(0, -1, -1, 0, 1, 2, 3); - } else { - return get(-2, -2, -1, 0, 1, 2, 3); - } - break; - - case FixedBorderType::WRAP: - if (column_index == 0) { - return get(height_ - 3, height_ - 2, height_ - 1, 0, 1, 2, 3); - } else if (column_index == 1) { - return get(height_ - 3, height_ - 2, -1, 0, 1, 2, 3); - } else { - return get(height_ - 3, -2, -1, 0, 1, 2, 3); - } - break; - - case FixedBorderType::REVERSE: - if (column_index == 0) { - return get(3, 2, 1, 0, 1, 2, 3); - } else if (column_index == 1) { - return get(1, 0, -1, 0, 1, 2, 3); - } else { - return get(-1, -2, -1, 0, 1, 2, 3); - } - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - - // Returns offsets for columns affected by right border. - Offsets offsets_with_right_border(size_t column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - switch (border_type_) { - case FixedBorderType::REPLICATE: - if (column_index == (height_ - 3)) { - return get(-3, -2, -1, 0, 1, 2, 2); - } else if (column_index == (height_ - 2)) { - return get(-3, -2, -1, 0, 1, 1, 1); - } else { - return get(-3, -2, -1, 0, 0, 0, 0); - } - break; - - case FixedBorderType::REFLECT: - if (column_index == (height_ - 3)) { - return get(-3, -2, -1, 0, 1, 2, 2); - } else if (column_index == (height_ - 2)) { - return get(-3, -2, -1, 0, 1, 1, 0); - } else { - return get(-3, -2, -1, 0, 0, -1, -2); - } - break; - - case FixedBorderType::WRAP: - if (column_index == (height_ - 3)) { - return get(-3, -2, -1, 0, 1, 2, 3 - height_); - } else if (column_index == (height_ - 2)) { - return get(-3, -2, -1, 0, 1, 2 - height_, 3 - height_); - } else { - return get(-3, -2, -1, 0, 1 - height_, 2 - height_, 3 - height_); - } - break; - - case FixedBorderType::REVERSE: - if (column_index == (height_ - 3)) { - return get(-3, -2, -1, 0, 1, 2, 1); - } else if (column_index == (height_ - 2)) { - return get(-3, -2, -1, 0, 1, 0, -1); - } else { - return get(-3, -2, -1, 0, -1, -2, -3); - } - break; - } - // Unreachable. Compiler should emit a warning-as-error if any cases are - // uncovered above. - return Offsets{}; // GCOVR_EXCL_LINE - } - - // Returns offsets for rows or columns affected by any border. - Offsets offsets_with_border(size_t row_or_column_index) const - KLEIDICV_STREAMING_COMPATIBLE { - if (row_or_column_index <= 2U) { - // Rows and columns have the same offsets. - return offsets_with_left_border(row_or_column_index); - } - if (row_or_column_index >= (height_ - 3U)) { - // Rows and columns have the same offsets. - return offsets_with_right_border(row_or_column_index); - } - return offsets_without_border(); - } - - private: - // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6) const KLEIDICV_STREAMING_COMPATIBLE { - return Offsets{o0, o1, o2, o3, o4, o5, o6}; - } - - size_t height_; - FixedBorderType border_type_; -}; // end of class FixedBorderInfo - -// Shorthand for 7x7 filter border type. -template -using FixedBorderInfo7x7 = FixedBorderInfo; - -} // namespace KLEIDICV_TARGET_NAMESPACE - -#endif // KLEIDICV_WORKSPACE_BORDER_7X7_H diff --git a/kleidicv/include/kleidicv/workspace/separable.h b/kleidicv/include/kleidicv/workspace/separable.h index 6a501686e..7a341d51e 100644 --- a/kleidicv/include/kleidicv/workspace/separable.h +++ b/kleidicv/include/kleidicv/workspace/separable.h @@ -168,7 +168,7 @@ class SeparableFilterWorkspace final { for (size_t horizontal_index = 0; horizontal_index < margin; ++horizontal_index) { auto offsets = - horizontal_border.offsets_with_left_border(horizontal_index); + horizontal_border.offsets_with_top_or_left_border(horizontal_index); filter.process_horizontal_borders(buffer_rows.at(0, horizontal_index), dst_rows.at(0, horizontal_index), offsets); @@ -188,7 +188,8 @@ class SeparableFilterWorkspace final { for (size_t horizontal_index = 0; horizontal_index < margin; ++horizontal_index) { size_t index = width - margin + horizontal_index; - auto offsets = horizontal_border.offsets_with_right_border(index); + auto offsets = + horizontal_border.offsets_with_bottom_or_right_border(index); filter.process_horizontal_borders(buffer_rows.at(0, index), dst_rows.at(0, index), offsets); } -- GitLab