diff --git a/kleidicv/include/kleidicv/filter_driver_neon.h b/kleidicv/include/kleidicv/filter_driver_neon.h index daacde7afb313e5f8ab106260d05ce0677e7981d..42c857dfd1c39c0e780554944ae886c5ce0dd565 100644 --- a/kleidicv/include/kleidicv/filter_driver_neon.h +++ b/kleidicv/include/kleidicv/filter_driver_neon.h @@ -37,6 +37,26 @@ class SeparableFilterDriver { SourceVecTraits::num_lanes()}; constexpr auto seq = std::make_index_sequence{}; + if constexpr (KernelSize == 3) { + loop.unroll_twice([&](size_t index) { + auto src_0_x2 = vld1q_x2(&src_rows.at(border_offsets.c(0))[index]); + auto src_1_x2 = vld1q_x2(&src_rows.at(border_offsets.c(1))[index]); + auto src_2_x2 = vld1q_x2(&src_rows.at(border_offsets.c(2))[index]); + + SourceVectorType src_a[3], src_b[3]; + src_a[0] = src_0_x2.val[0]; + src_b[0] = src_0_x2.val[1]; + src_a[1] = src_1_x2.val[0]; + src_b[1] = src_1_x2.val[1]; + src_a[2] = src_2_x2.val[0]; + src_b[2] = src_2_x2.val[1]; + + filter_.vertical_vector_path(src_a, &dst_rows[index]); + filter_.vertical_vector_path( + src_b, &dst_rows[index + SourceVecTraits::num_lanes()]); + }); + } + loop.unroll_once([&](size_t index) { vertical_vector_path(src_rows, dst_rows, border_offsets, index, seq); }); @@ -54,7 +74,7 @@ class SeparableFilterDriver { constexpr auto seq = std::make_index_sequence{}; loop.unroll_twice([&](size_t index) { - horizontal_vector_path_2x(src_rows, dst_rows, border_offsets, index, seq); + horizontal_vector_path_2x(src_rows, dst_rows, border_offsets, index); }); loop.unroll_once([&](size_t index) { @@ -97,19 +117,129 @@ class SeparableFilterDriver { filter_.vertical_scalar_path(src, &dst_rows[index]); } - template void horizontal_vector_path_2x(Rows src_rows, Rows dst_rows, - BorderOffsets border_offsets, size_t index, - std::index_sequence) const { - BufferVectorType src_a[KernelSize] = { - vld1q(&src_rows.at(0, border_offsets.c(SeqNum))[index])...}; - BufferVectorType src_b[KernelSize] = {vld1q(&src_rows.at( - 0, border_offsets.c(SeqNum))[index + BufferVecTraits::num_lanes()])...}; + BorderOffsets border_offsets, + size_t index) const { + if constexpr (KernelSize == 3) { + auto src_0_x2 = vld1q_x2(&src_rows.at(0, border_offsets.c(0))[index]); + auto src_1_x2 = vld1q_x2(&src_rows.at(0, border_offsets.c(1))[index]); + auto src_2_x2 = vld1q_x2(&src_rows.at(0, border_offsets.c(2))[index]); + + BufferVectorType src_a[3], src_b[3]; + src_a[0] = src_0_x2.val[0]; + src_b[0] = src_0_x2.val[1]; + src_a[1] = src_1_x2.val[0]; + src_b[1] = src_1_x2.val[1]; + src_a[2] = src_2_x2.val[0]; + src_b[2] = src_2_x2.val[1]; + + filter_.horizontal_vector_path(src_a, &dst_rows[index]); + filter_.horizontal_vector_path( + src_b, &dst_rows[index + BufferVecTraits::num_lanes()]); + } else if constexpr (KernelSize == 5) { + BufferVectorType src_a[5], src_b[5]; + src_a[0] = vld1q(&src_rows.at(0, border_offsets.c(0))[index]); + src_b[0] = vld1q(&src_rows.at( + 0, border_offsets.c(0))[index + BufferVecTraits::num_lanes()]); + src_a[1] = vld1q(&src_rows.at(0, border_offsets.c(1))[index]); + src_b[1] = vld1q(&src_rows.at( + 0, border_offsets.c(1))[index + BufferVecTraits::num_lanes()]); + src_a[2] = vld1q(&src_rows.at(0, border_offsets.c(2))[index]); + src_b[2] = vld1q(&src_rows.at( + 0, border_offsets.c(2))[index + BufferVecTraits::num_lanes()]); + src_a[3] = vld1q(&src_rows.at(0, border_offsets.c(3))[index]); + src_b[3] = vld1q(&src_rows.at( + 0, border_offsets.c(3))[index + BufferVecTraits::num_lanes()]); + src_a[4] = vld1q(&src_rows.at(0, border_offsets.c(4))[index]); + src_b[4] = vld1q(&src_rows.at( + 0, border_offsets.c(4))[index + BufferVecTraits::num_lanes()]); - filter_.horizontal_vector_path(src_a, &dst_rows[index]); - filter_.horizontal_vector_path( - src_b, &dst_rows[index + BufferVecTraits::num_lanes()]); + filter_.horizontal_vector_path(src_a, &dst_rows[index]); + filter_.horizontal_vector_path( + src_b, &dst_rows[index + BufferVecTraits::num_lanes()]); + } else if constexpr (KernelSize == 7) { + BufferVectorType src_a[7], src_b[7]; + src_a[0] = vld1q(&src_rows.at(0, border_offsets.c(0))[index]); + src_b[0] = vld1q(&src_rows.at( + 0, border_offsets.c(0))[index + BufferVecTraits::num_lanes()]); + src_a[1] = vld1q(&src_rows.at(0, border_offsets.c(1))[index]); + src_b[1] = vld1q(&src_rows.at( + 0, border_offsets.c(1))[index + BufferVecTraits::num_lanes()]); + src_a[2] = vld1q(&src_rows.at(0, border_offsets.c(2))[index]); + src_b[2] = vld1q(&src_rows.at( + 0, border_offsets.c(2))[index + BufferVecTraits::num_lanes()]); + src_a[3] = vld1q(&src_rows.at(0, border_offsets.c(3))[index]); + src_b[3] = vld1q(&src_rows.at( + 0, border_offsets.c(3))[index + BufferVecTraits::num_lanes()]); + src_a[4] = vld1q(&src_rows.at(0, border_offsets.c(4))[index]); + src_b[4] = vld1q(&src_rows.at( + 0, border_offsets.c(4))[index + BufferVecTraits::num_lanes()]); + src_a[5] = vld1q(&src_rows.at(0, border_offsets.c(5))[index]); + src_b[5] = vld1q(&src_rows.at( + 0, border_offsets.c(5))[index + BufferVecTraits::num_lanes()]); + src_a[6] = vld1q(&src_rows.at(0, border_offsets.c(6))[index]); + src_b[6] = vld1q(&src_rows.at( + 0, border_offsets.c(6))[index + BufferVecTraits::num_lanes()]); + + filter_.horizontal_vector_path(src_a, &dst_rows[index]); + filter_.horizontal_vector_path( + src_b, &dst_rows[index + BufferVecTraits::num_lanes()]); + } else if constexpr (KernelSize == 15) { + BufferVectorType src_a[15], src_b[15]; + src_a[0] = vld1q(&src_rows.at(0, border_offsets.c(0))[index]); + src_b[0] = vld1q(&src_rows.at( + 0, border_offsets.c(0))[index + BufferVecTraits::num_lanes()]); + src_a[1] = vld1q(&src_rows.at(0, border_offsets.c(1))[index]); + src_b[1] = vld1q(&src_rows.at( + 0, border_offsets.c(1))[index + BufferVecTraits::num_lanes()]); + src_a[2] = vld1q(&src_rows.at(0, border_offsets.c(2))[index]); + src_b[2] = vld1q(&src_rows.at( + 0, border_offsets.c(2))[index + BufferVecTraits::num_lanes()]); + src_a[3] = vld1q(&src_rows.at(0, border_offsets.c(3))[index]); + src_b[3] = vld1q(&src_rows.at( + 0, border_offsets.c(3))[index + BufferVecTraits::num_lanes()]); + src_a[4] = vld1q(&src_rows.at(0, border_offsets.c(4))[index]); + src_b[4] = vld1q(&src_rows.at( + 0, border_offsets.c(4))[index + BufferVecTraits::num_lanes()]); + src_a[5] = vld1q(&src_rows.at(0, border_offsets.c(5))[index]); + src_b[5] = vld1q(&src_rows.at( + 0, border_offsets.c(5))[index + BufferVecTraits::num_lanes()]); + src_a[6] = vld1q(&src_rows.at(0, border_offsets.c(6))[index]); + src_b[6] = vld1q(&src_rows.at( + 0, border_offsets.c(6))[index + BufferVecTraits::num_lanes()]); + src_a[7] = vld1q(&src_rows.at(0, border_offsets.c(7))[index]); + src_b[7] = vld1q(&src_rows.at( + 0, border_offsets.c(7))[index + BufferVecTraits::num_lanes()]); + src_a[8] = vld1q(&src_rows.at(0, border_offsets.c(8))[index]); + src_b[8] = vld1q(&src_rows.at( + 0, border_offsets.c(8))[index + BufferVecTraits::num_lanes()]); + src_a[9] = vld1q(&src_rows.at(0, border_offsets.c(9))[index]); + src_b[9] = vld1q(&src_rows.at( + 0, border_offsets.c(9))[index + BufferVecTraits::num_lanes()]); + src_a[10] = vld1q(&src_rows.at(0, border_offsets.c(10))[index]); + src_b[10] = vld1q(&src_rows.at( + 0, border_offsets.c(10))[index + BufferVecTraits::num_lanes()]); + src_a[11] = vld1q(&src_rows.at(0, border_offsets.c(11))[index]); + src_b[11] = vld1q(&src_rows.at( + 0, border_offsets.c(11))[index + BufferVecTraits::num_lanes()]); + src_a[12] = vld1q(&src_rows.at(0, border_offsets.c(12))[index]); + src_b[12] = vld1q(&src_rows.at( + 0, border_offsets.c(12))[index + BufferVecTraits::num_lanes()]); + src_a[13] = vld1q(&src_rows.at(0, border_offsets.c(13))[index]); + src_b[13] = vld1q(&src_rows.at( + 0, border_offsets.c(13))[index + BufferVecTraits::num_lanes()]); + src_a[14] = vld1q(&src_rows.at(0, border_offsets.c(14))[index]); + src_b[14] = vld1q(&src_rows.at( + 0, border_offsets.c(14))[index + BufferVecTraits::num_lanes()]); + + filter_.horizontal_vector_path(src_a, &dst_rows[index]); + filter_.horizontal_vector_path( + src_b, &dst_rows[index + BufferVecTraits::num_lanes()]); + } else { + static_assert(KernelSize != KernelSize, + "please define variants for other kernel sizes"); + } } template diff --git a/kleidicv/include/kleidicv/filter_driver_sc.h b/kleidicv/include/kleidicv/filter_driver_sc.h index 6bbb96eee513d7b58e2f97bf93037e6285512f3d..4333b0c5cc6e92b94100698d1a6c09182d576854 100644 --- a/kleidicv/include/kleidicv/filter_driver_sc.h +++ b/kleidicv/include/kleidicv/filter_driver_sc.h @@ -65,7 +65,7 @@ class SeparableFilterDriver { loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING_COMPATIBLE { horizontal_vector_path_2x(pg_all, src_rows, dst_rows, border_offsets, - index, seq); + index); }); loop.unroll_once([&](size_t index) KLEIDICV_STREAMING_COMPATIBLE { @@ -103,20 +103,140 @@ class SeparableFilterDriver { &dst_rows[index]); } - template - void horizontal_vector_path_2x(svbool_t pg, Rows src_rows, - Rows dst_rows, - BorderOffsets border_offsets, size_t index, - std::index_sequence) const - KLEIDICV_STREAMING_COMPATIBLE { - filter_.horizontal_vector_path( - pg, svld1(pg, &src_rows.at(0, border_offsets.c(SeqNum))[index])..., - &dst_rows[index]); + void horizontal_vector_path_2x( + svbool_t pg, Rows src_rows, + Rows dst_rows, BorderOffsets border_offsets, + size_t index) const KLEIDICV_STREAMING_COMPATIBLE { + if constexpr (KernelSize == 3) { + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; - filter_.horizontal_vector_path( - pg, - svld1_vnum(pg, &src_rows.at(0, border_offsets.c(SeqNum))[index], 1)..., - &dst_rows[index + BufferVecTraits::num_lanes()]); + BufferVectorType src_0_0 = svld1(pg, &src_0[0]); + BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); + BufferVectorType src_0_1 = svld1(pg, &src_1[0]); + BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1); + BufferVectorType src_0_2 = svld1(pg, &src_2[0]); + BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1); + + filter_.horizontal_vector_path(pg, src_0_0, src_0_1, src_0_2, + &dst_rows[index]); + filter_.horizontal_vector_path( + pg, src_1_0, src_1_1, src_1_2, + &dst_rows[index + BufferVecTraits::num_lanes()]); + } else if constexpr (KernelSize == 5) { + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; + + BufferVectorType src_0_0 = svld1(pg, &src_0[0]); + BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); + BufferVectorType src_0_1 = svld1(pg, &src_1[0]); + BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1); + BufferVectorType src_0_2 = svld1(pg, &src_2[0]); + BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1); + BufferVectorType src_0_3 = svld1(pg, &src_3[0]); + BufferVectorType src_1_3 = svld1_vnum(pg, &src_3[0], 1); + BufferVectorType src_0_4 = svld1(pg, &src_4[0]); + BufferVectorType src_1_4 = svld1_vnum(pg, &src_4[0], 1); + + filter_.horizontal_vector_path(pg, src_0_0, src_0_1, src_0_2, src_0_3, + src_0_4, &dst_rows[index]); + filter_.horizontal_vector_path( + pg, src_1_0, src_1_1, src_1_2, src_1_3, src_1_4, + &dst_rows[index + BufferVecTraits::num_lanes()]); + } else if constexpr (KernelSize == 7) { + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; + auto src_5 = &src_rows.at(0, border_offsets.c(5))[index]; + auto src_6 = &src_rows.at(0, border_offsets.c(6))[index]; + + BufferVectorType src_0_0 = svld1(pg, &src_0[0]); + BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); + BufferVectorType src_0_1 = svld1(pg, &src_1[0]); + BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1); + BufferVectorType src_0_2 = svld1(pg, &src_2[0]); + BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1); + BufferVectorType src_0_3 = svld1(pg, &src_3[0]); + BufferVectorType src_1_3 = svld1_vnum(pg, &src_3[0], 1); + BufferVectorType src_0_4 = svld1(pg, &src_4[0]); + BufferVectorType src_1_4 = svld1_vnum(pg, &src_4[0], 1); + BufferVectorType src_0_5 = svld1(pg, &src_5[0]); + BufferVectorType src_1_5 = svld1_vnum(pg, &src_5[0], 1); + BufferVectorType src_0_6 = svld1(pg, &src_6[0]); + BufferVectorType src_1_6 = svld1_vnum(pg, &src_6[0], 1); + + filter_.horizontal_vector_path(pg, src_0_0, src_0_1, src_0_2, src_0_3, + src_0_4, src_0_5, src_0_6, + &dst_rows[index]); + filter_.horizontal_vector_path( + pg, src_1_0, src_1_1, src_1_2, src_1_3, src_1_4, src_1_5, src_1_6, + &dst_rows[index + BufferVecTraits::num_lanes()]); + } else if constexpr (KernelSize == 15) { + auto src_0 = &src_rows.at(0, border_offsets.c(0))[index]; + auto src_1 = &src_rows.at(0, border_offsets.c(1))[index]; + auto src_2 = &src_rows.at(0, border_offsets.c(2))[index]; + auto src_3 = &src_rows.at(0, border_offsets.c(3))[index]; + auto src_4 = &src_rows.at(0, border_offsets.c(4))[index]; + auto src_5 = &src_rows.at(0, border_offsets.c(5))[index]; + auto src_6 = &src_rows.at(0, border_offsets.c(6))[index]; + auto src_7 = &src_rows.at(0, border_offsets.c(7))[index]; + auto src_8 = &src_rows.at(0, border_offsets.c(8))[index]; + auto src_9 = &src_rows.at(0, border_offsets.c(9))[index]; + auto src_10 = &src_rows.at(0, border_offsets.c(10))[index]; + auto src_11 = &src_rows.at(0, border_offsets.c(11))[index]; + auto src_12 = &src_rows.at(0, border_offsets.c(12))[index]; + auto src_13 = &src_rows.at(0, border_offsets.c(13))[index]; + auto src_14 = &src_rows.at(0, border_offsets.c(14))[index]; + + BufferVectorType src_0_0 = svld1(pg, &src_0[0]); + BufferVectorType src_1_0 = svld1_vnum(pg, &src_0[0], 1); + BufferVectorType src_0_1 = svld1(pg, &src_1[0]); + BufferVectorType src_1_1 = svld1_vnum(pg, &src_1[0], 1); + BufferVectorType src_0_2 = svld1(pg, &src_2[0]); + BufferVectorType src_1_2 = svld1_vnum(pg, &src_2[0], 1); + BufferVectorType src_0_3 = svld1(pg, &src_3[0]); + BufferVectorType src_1_3 = svld1_vnum(pg, &src_3[0], 1); + BufferVectorType src_0_4 = svld1(pg, &src_4[0]); + BufferVectorType src_1_4 = svld1_vnum(pg, &src_4[0], 1); + BufferVectorType src_0_5 = svld1(pg, &src_5[0]); + BufferVectorType src_1_5 = svld1_vnum(pg, &src_5[0], 1); + BufferVectorType src_0_6 = svld1(pg, &src_6[0]); + BufferVectorType src_1_6 = svld1_vnum(pg, &src_6[0], 1); + BufferVectorType src_0_7 = svld1(pg, &src_7[0]); + BufferVectorType src_1_7 = svld1_vnum(pg, &src_7[0], 1); + BufferVectorType src_0_8 = svld1(pg, &src_8[0]); + BufferVectorType src_1_8 = svld1_vnum(pg, &src_8[0], 1); + BufferVectorType src_0_9 = svld1(pg, &src_9[0]); + BufferVectorType src_1_9 = svld1_vnum(pg, &src_9[0], 1); + BufferVectorType src_0_10 = svld1(pg, &src_10[0]); + BufferVectorType src_1_10 = svld1_vnum(pg, &src_10[0], 1); + BufferVectorType src_0_11 = svld1(pg, &src_11[0]); + BufferVectorType src_1_11 = svld1_vnum(pg, &src_11[0], 1); + BufferVectorType src_0_12 = svld1(pg, &src_12[0]); + BufferVectorType src_1_12 = svld1_vnum(pg, &src_12[0], 1); + BufferVectorType src_0_13 = svld1(pg, &src_13[0]); + BufferVectorType src_1_13 = svld1_vnum(pg, &src_13[0], 1); + BufferVectorType src_0_14 = svld1(pg, &src_14[0]); + BufferVectorType src_1_14 = svld1_vnum(pg, &src_14[0], 1); + + filter_.horizontal_vector_path( + pg, src_0_0, src_0_1, src_0_2, src_0_3, src_0_4, src_0_5, src_0_6, + src_0_7, src_0_8, src_0_9, src_0_10, src_0_11, src_0_12, src_0_13, + src_0_14, &dst_rows[index]); + filter_.horizontal_vector_path( + pg, src_1_0, src_1_1, src_1_2, src_1_3, src_1_4, src_1_5, src_1_6, + src_1_7, src_1_8, src_1_9, src_1_10, src_1_11, src_1_12, src_1_13, + src_1_14, &dst_rows[index + BufferVecTraits::num_lanes()]); + } else { + static_assert(KernelSize != KernelSize, + "please define variants for other kernel sizes"); + } } template diff --git a/kleidicv/include/kleidicv/workspace/border.h b/kleidicv/include/kleidicv/workspace/border.h index 3e5a8d34c0839c203de2da3e84d4861bb9f617b8..a1b4164bc20931a86eedfe0358bde3f906fa50f0 100644 --- a/kleidicv/include/kleidicv/workspace/border.h +++ b/kleidicv/include/kleidicv/workspace/border.h @@ -29,9 +29,15 @@ class FixedBorderInfo final { static_assert(sizeof...(args) == KernelSize); } - size_t c(int i) const { return offsets_[i]; } + size_t c(size_t i) const { return offsets_[i]; } private: + template + static inline Offsets from_seq(std::integer_sequence) { + return Offsets{SeqNum...}; + } + + friend class FixedBorderInfo; size_t offsets_[KernelSize]; }; @@ -50,19 +56,19 @@ class FixedBorderInfo final { constexpr auto seq = std::make_integer_sequence> 1)>{}; switch (border_type_) { case FixedBorderType::REPLICATE: - return get_border(index, seq); + return get_offsets(index, seq); break; case FixedBorderType::REFLECT: - return get_border(index, seq); + return get_offsets(index, seq); break; case FixedBorderType::WRAP: - return get_border(index, seq); + return get_offsets(index, seq); break; case FixedBorderType::REVERSE: - return get_border(index, seq); + return get_offsets(index, seq); break; } // Unreachable. Compiler should emit a warning-as-error if any cases are @@ -78,19 +84,19 @@ class FixedBorderInfo final { index = length_ - index - 1; switch (border_type_) { case FixedBorderType::REPLICATE: - return get_border(index, seq); + return get_offsets(index, seq); break; case FixedBorderType::REFLECT: - return get_border(index, seq); + return get_offsets(index, seq); break; case FixedBorderType::WRAP: - return get_border(index, seq); + return get_offsets(index, seq); break; case FixedBorderType::REVERSE: - return get_border(index, seq); + return get_offsets(index, seq); break; } // Unreachable. Compiler should emit a warning-as-error if any cases are @@ -120,83 +126,135 @@ class FixedBorderInfo final { KLEIDICV_STREAMING_COMPATIBLE { // Example (15x15): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, // 7}; - return Offsets{(SeqNum - (KernelSize >> 1))..., 0, (SeqNum + 1)...}; + constexpr int k_2 = static_cast(KernelSize >> 1); + return Offsets{(SeqNum - k_2)..., 0, (SeqNum + 1)...}; } // Creates the Offsets object containing offsets in various intervals // depending on the row/column index, border type as well the border - // position used. NOLINTBEGIN(readability-function-cognitive-complexity) - template - inline Offsets get_border(int index, std::integer_sequence) - const KLEIDICV_STREAMING_COMPATIBLE { + // position used. + template + static constexpr auto generate_border_offsets( + std::integer_sequence) { + constexpr int k_2 = static_cast(KernelSize >> 1); if constexpr (BorderType == FixedBorderType::REPLICATE && !IsRight) { // Example (15x15, index 4, left): Offsets{-4, -4, -4, -4, -3, -2, -1, 0, // 1, 2, 3, 4, 5, 6, 7}; - return Offsets{(SeqNum - static_cast(KernelSize >> 1) < -index) - ? -index - : (SeqNum - (KernelSize >> 1))..., - 0, (SeqNum + 1)...}; + return std::integer_sequence < int, + (SeqNum - k_2 < -Index) ? -Index : (SeqNum - k_2)..., 0, + (SeqNum + 1)... > {}; } if constexpr (BorderType == FixedBorderType::REPLICATE && IsRight) { // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, // 1, 2, 3, 4, 4, 4, 4}; - return Offsets{(SeqNum - (KernelSize >> 1))..., 0, - (SeqNum >= index) ? index : (SeqNum + 1)...}; + return std::integer_sequence < int, (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? Index : (SeqNum + 1)... > {}; } if constexpr (BorderType == FixedBorderType::REFLECT && !IsRight) { // Example (15x15, index 4, left): Offsets{-2, -3, -4, -4, -3, -2, -1, 0, // 1, 2, 3, 4, 5, 6, 7}; - return Offsets{(SeqNum - static_cast(KernelSize >> 1) < -index) - ? ((KernelSize >> 1) - (index << 1) - (SeqNum + 1)) - : (SeqNum - (KernelSize >> 1))..., - 0, (SeqNum + 1)...}; + return std::integer_sequence < int, + (SeqNum - k_2 < -Index) ? (k_2 - (Index << 1) - (SeqNum + 1)) + : (SeqNum - k_2)..., + 0, (SeqNum + 1)... > {}; } if constexpr (BorderType == FixedBorderType::REFLECT && IsRight) { // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, // 1, 2, 3, 4, 4, 3, 2}; - return Offsets{ - (SeqNum - (KernelSize >> 1))..., 0, - (SeqNum >= index) ? ((index << 1) - SeqNum) : (SeqNum + 1)...}; + return std::integer_sequence < int, (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? ((Index << 1) - SeqNum) : (SeqNum + 1)... > {}; } - if constexpr (BorderType == FixedBorderType::WRAP && !IsRight) { - // Example (15x15, index 4, left): Offsets{length_ - 7, length_ - 6, - // length_ - 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}; - return Offsets{(SeqNum - static_cast(KernelSize >> 1) < -index) - ? (SeqNum - (KernelSize >> 1) + length_) - : (SeqNum - (KernelSize >> 1))..., - 0, (SeqNum + 1)...}; + if constexpr (BorderType == FixedBorderType::REVERSE && !IsRight) { + // Example (15x15, index 4, left): Offsets{-1, -2, -3, -4, -3, -2, -1, 0, + // 1, 2, 3, 4, 5, 6, 7}; + return std::integer_sequence < int, + (SeqNum - k_2 < -Index) ? (k_2 - (Index << 1) - SeqNum) + : (SeqNum - k_2)..., + 0, (SeqNum + 1)... > {}; } - if constexpr (BorderType == FixedBorderType::WRAP && IsRight) { + if constexpr (BorderType == FixedBorderType::REVERSE && IsRight) { // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, - // 1, 2, 3, 4, 5 - length_, 6 - length_, 7 - length_}; - return Offsets{ - (SeqNum - (KernelSize >> 1))..., 0, - (SeqNum >= index) ? (SeqNum - length_ + 1) : (SeqNum + 1)...}; + // 1, 2, 3, 4, 3, 2, 1}; + return std::integer_sequence < int, (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? ((Index << 1) - (SeqNum + 1)) + : (SeqNum + 1)... > {}; } + } - if constexpr (BorderType == FixedBorderType::REVERSE && !IsRight) { - // Example (15x15, index 4, left): Offsets{-1, -2, -3, -4, -3, -2, -1, 0, - // 1, 2, 3, 4, 5, 6, 7}; - return Offsets{(SeqNum - static_cast(KernelSize >> 1) < -index) - ? ((KernelSize >> 1) - (index << 1) - SeqNum) - : (SeqNum - (KernelSize >> 1))..., + template + inline Offsets generate_border_offsets_wrap( + std::integer_sequence) const + KLEIDICV_STREAMING_COMPATIBLE { + constexpr int k_2 = static_cast(KernelSize >> 1); + if constexpr (!IsRight) { + // Example (15x15, index 4, left): Offsets{length_ - 7, length_ - 6, + // length_ - 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}; + return Offsets{(SeqNum - k_2 < -Index) ? (SeqNum - k_2 + length_) + : (SeqNum - k_2)..., 0, (SeqNum + 1)...}; } - if constexpr (BorderType == FixedBorderType::REVERSE && IsRight) { + if constexpr (IsRight) { // Example (15x15, index 4, right): Offsets{-7, -6, -5, -4, -3, -2, -1, 0, - // 1, 2, 3, 4, 3, 2, 1}; + // 1, 2, 3, 4, 5 - length_, 6 - length_, 7 - length_}; return Offsets{ - (SeqNum - (KernelSize >> 1))..., 0, - (SeqNum >= index) ? ((index << 1) - (SeqNum + 1)) : (SeqNum + 1)...}; + (SeqNum - k_2)..., 0, + (SeqNum >= Index) ? (SeqNum - length_ + 1) : (SeqNum + 1)...}; + } + } + + // NOLINTBEGIN(readability-function-cognitive-complexity, + // readability-avoid-nested-conditional-operator) + template + inline Offsets get_offsets(int index, + std::integer_sequence seq) const + KLEIDICV_STREAMING_COMPATIBLE { + if constexpr (BorderType == FixedBorderType::WRAP) { + return (index == 0) ? generate_border_offsets_wrap<0, IsRight>(seq) + : (index == 1) ? generate_border_offsets_wrap<1, IsRight>(seq) + : (index == 2) ? generate_border_offsets_wrap<2, IsRight>(seq) + : (index == 3) ? generate_border_offsets_wrap<3, IsRight>(seq) + : (index == 4) ? generate_border_offsets_wrap<4, IsRight>(seq) + : (index == 5) ? generate_border_offsets_wrap<5, IsRight>(seq) + : (index == 6) ? generate_border_offsets_wrap<6, IsRight>(seq) + : (index == 7) ? generate_border_offsets_wrap<7, IsRight>(seq) + : (index == 8) ? generate_border_offsets_wrap<8, IsRight>(seq) + : (index == 9) ? generate_border_offsets_wrap<9, IsRight>(seq) + : (index == 10) ? generate_border_offsets_wrap<10, IsRight>(seq) + : (index == 11) ? generate_border_offsets_wrap<11, IsRight>(seq) + : (index == 12) ? generate_border_offsets_wrap<12, IsRight>(seq) + : (index == 13) ? generate_border_offsets_wrap<13, IsRight>(seq) + : (index == 14) ? generate_border_offsets_wrap<14, IsRight>(seq) + : Offsets{}; + } else { + // clang-format off + return + (index == 0) ? Offsets::from_seq(generate_border_offsets<0, BorderType, IsRight>(seq)) + : (index == 1) ? Offsets::from_seq(generate_border_offsets<1, BorderType, IsRight>(seq)) + : (index == 2) ? Offsets::from_seq(generate_border_offsets<2, BorderType, IsRight>(seq)) + : (index == 3) ? Offsets::from_seq(generate_border_offsets<3, BorderType, IsRight>(seq)) + : (index == 4) ? Offsets::from_seq(generate_border_offsets<4, BorderType, IsRight>(seq)) + : (index == 5) ? Offsets::from_seq(generate_border_offsets<5, BorderType, IsRight>(seq)) + : (index == 6) ? Offsets::from_seq(generate_border_offsets<6, BorderType, IsRight>(seq)) + : (index == 7) ? Offsets::from_seq(generate_border_offsets<7, BorderType, IsRight>(seq)) + : (index == 8) ? Offsets::from_seq(generate_border_offsets<8, BorderType, IsRight>(seq)) + : (index == 9) ? Offsets::from_seq(generate_border_offsets<9, BorderType, IsRight>(seq)) + : (index == 10) ? Offsets::from_seq(generate_border_offsets<10, BorderType, IsRight>(seq)) + : (index == 11) ? Offsets::from_seq(generate_border_offsets<11, BorderType, IsRight>(seq)) + : (index == 12) ? Offsets::from_seq(generate_border_offsets<12, BorderType, IsRight>(seq)) + : (index == 13) ? Offsets::from_seq(generate_border_offsets<13, BorderType, IsRight>(seq)) + : (index == 14) ? Offsets::from_seq(generate_border_offsets<14, BorderType, IsRight>(seq)) + : Offsets{}; + // clang-format on } } - // NOLINTEND(readability-function-cognitive-complexity) + // NOLINTEND(readability-function-cognitive-complexity, + // readability-avoid-nested-conditional-operator) size_t length_; FixedBorderType border_type_;