From f1c89bb78f0ff0f2fbdbb2de8564e308db9779ea Mon Sep 17 00:00:00 2001 From: Denes Tarjan Date: Wed, 27 Mar 2024 15:19:02 +0100 Subject: [PATCH] [NFC] Get rid of unused try_to_avoid_tail_loops at compile time --- intrinsiccv/include/intrinsiccv/neon.h | 20 ++++++------- intrinsiccv/include/intrinsiccv/utils.h | 28 ++++++++----------- .../src/arithmetics/transpose_neon.cpp | 6 ++-- .../src/morphology/morphology_neon.cpp | 12 ++++---- 4 files changed, 28 insertions(+), 38 deletions(-) diff --git a/intrinsiccv/include/intrinsiccv/neon.h b/intrinsiccv/include/intrinsiccv/neon.h index 3cf168659..1742ed841 100644 --- a/intrinsiccv/include/intrinsiccv/neon.h +++ b/intrinsiccv/include/intrinsiccv/neon.h @@ -342,9 +342,8 @@ class SeparableFilter { void process_vertical(size_t width, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets) const { - LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()}; - - loop.try_avoid_tail_loop(); + LoopUnroll2 loop{width * src_rows.channels(), + SourceVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { auto src_0 = &src_rows.at(border_offsets.c0())[index]; @@ -388,9 +387,8 @@ class SeparableFilter { void process_horizontal(size_t width, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets) const { - LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()}; - - loop.try_avoid_tail_loop(); + LoopUnroll2 loop{width * src_rows.channels(), + BufferVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; @@ -474,9 +472,8 @@ class SeparableFilter { void process_vertical(size_t width, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets) const { - LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()}; - - loop.try_avoid_tail_loop(); + LoopUnroll2 loop{width * src_rows.channels(), + SourceVecTraits::num_lanes()}; loop.unroll_once([&](size_t index) { SourceVectorType src[5]; @@ -502,9 +499,8 @@ class SeparableFilter { void process_horizontal(size_t width, Rows src_rows, Rows dst_rows, BorderOffsets border_offsets) const { - LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()}; - - loop.try_avoid_tail_loop(); + LoopUnroll2 loop{width * src_rows.channels(), + BufferVecTraits::num_lanes()}; loop.unroll_twice([&](size_t index) { auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; diff --git a/intrinsiccv/include/intrinsiccv/utils.h b/intrinsiccv/include/intrinsiccv/utils.h index e58fd4f82..8470173ed 100644 --- a/intrinsiccv/include/intrinsiccv/utils.h +++ b/intrinsiccv/include/intrinsiccv/utils.h @@ -12,6 +12,7 @@ #include "intrinsiccv/config.h" #include "intrinsiccv/ctypes.h" +#include "intrinsiccv/traits.h" namespace INTRINSICCV_TARGET_NAMESPACE { @@ -207,21 +208,20 @@ class LoopUnroll final { }; // end of class LoopUnroll // This is the same as LoopUnroll, except that it passes indices to callbacks. +template class LoopUnroll2 final { public: explicit LoopUnroll2(size_t length, size_t step) INTRINSICCV_STREAMING_COMPATIBLE : length_(length), step_(step), - index_(0), - try_avoid_tail_loop_(false) {} + index_(0) {} explicit LoopUnroll2(size_t start_index, size_t length, size_t step) INTRINSICCV_STREAMING_COMPATIBLE : length_(length), step_(step), - index_(std::min(start_index, length)), - try_avoid_tail_loop_(false) {} + index_(std::min(start_index, length)) {} // Loop unrolled four times. template @@ -289,12 +289,6 @@ class LoopUnroll2 final { return *this; } - // Instructs the loop logic to try to avoid the tail loop. - LoopUnroll2 &try_avoid_tail_loop() INTRINSICCV_STREAMING_COMPATIBLE { - try_avoid_tail_loop_ = true; - return *this; - } - // Returns true if there is nothing left to process. bool empty() const INTRINSICCV_STREAMING_COMPATIBLE { return length_ == index_; @@ -321,11 +315,14 @@ class LoopUnroll2 final { index_ += n_step; } - // Try to avoid the tail loop. - if ((UnrollFactor == 1) && remaining_length() && try_avoid_tail_loop_ && - (length_ >= n_step)) { - index_ = length_ - n_step; - max_index = length_; + // Try to avoid the tail loop if Tail is TryToAvoidTailLoop + if constexpr (try_to_avoid_tail_loop && (UnrollFactor == 1)) { + if (remaining_length() && (length_ >= n_step)) { + index_ = length_ - n_step; + max_index = length_; + } else { + break; + } } else { break; } @@ -337,7 +334,6 @@ class LoopUnroll2 final { size_t length_; size_t step_; size_t index_; - bool try_avoid_tail_loop_; }; // end of class LoopUnroll2 // Check whether any of the arguments are null pointers. diff --git a/intrinsiccv/src/arithmetics/transpose_neon.cpp b/intrinsiccv/src/arithmetics/transpose_neon.cpp index 736300d18..90b903ef9 100644 --- a/intrinsiccv/src/arithmetics/transpose_neon.cpp +++ b/intrinsiccv/src/arithmetics/transpose_neon.cpp @@ -104,8 +104,7 @@ static intrinsiccv_error_t transpose(Rectangle rect, constexpr size_t num_of_lanes = VecTraits::num_lanes(); auto handle_lane_number_of_rows = [&](size_t vindex) { - LoopUnroll2 horizontal_loop(rect.width(), num_of_lanes); - horizontal_loop.try_avoid_tail_loop(); + LoopUnroll2 horizontal_loop(rect.width(), num_of_lanes); horizontal_loop.unroll_once([&](size_t hindex) { // if the input is big enough handle it tile by tile @@ -119,8 +118,7 @@ static intrinsiccv_error_t transpose(Rectangle rect, }); }; - LoopUnroll2 vertical_loop(rect.height(), num_of_lanes); - vertical_loop.try_avoid_tail_loop(); + LoopUnroll2 vertical_loop(rect.height(), num_of_lanes); vertical_loop.unroll_once(handle_lane_number_of_rows); diff --git a/intrinsiccv/src/morphology/morphology_neon.cpp b/intrinsiccv/src/morphology/morphology_neon.cpp index 9bc91c368..326e06589 100644 --- a/intrinsiccv/src/morphology/morphology_neon.cpp +++ b/intrinsiccv/src/morphology/morphology_neon.cpp @@ -30,10 +30,10 @@ class VerticalOp final { // handle two rows at once. for (size_t height = 0; height < rect_.height(); height += 2) { // Iterate across the columns from left to right. - LoopUnroll2 loop{rect_.width() * src_rows.channels(), - VecTraits::num_lanes()}; + LoopUnroll2 loop{rect_.width() * src_rows.channels(), + VecTraits::num_lanes()}; // clang-format off - loop.try_avoid_tail_loop() + loop .unroll_four_times([&](size_t index) { vector_path_4x(src_rows, dst_rows, index, height); }) @@ -322,10 +322,10 @@ class HorizontalOp final { // Iterate across the rows from top to bottom. for (size_t height = 0; height < rect_.height(); ++height) { // Iterate across the columns from left to right. - LoopUnroll2 loop{rect_.width() * src_rows.channels(), - VecTraits::num_lanes()}; + LoopUnroll2 loop{rect_.width() * src_rows.channels(), + VecTraits::num_lanes()}; // clang-format off - loop.try_avoid_tail_loop() + loop .unroll_four_times([&](size_t index) { vector_path_4x(src_rows, dst_rows, index); }) -- GitLab