diff --git a/intrinsiccv/include/intrinsiccv.h b/intrinsiccv/include/intrinsiccv.h index 91471de6dd6d939baa219ebb2022882ac6052934..eb504d0ceaee0a36568ee792d1b25973882d33c3 100644 --- a/intrinsiccv/include/intrinsiccv.h +++ b/intrinsiccv/include/intrinsiccv.h @@ -184,8 +184,37 @@ INTRINSICCV_BINARY_OP_SCALE(intrinsiccv_saturating_multiply_s16, int16_t, INTRINSICCV_BINARY_OP_SCALE(intrinsiccv_saturating_multiply_s32, int32_t, double); -INTRINSICCV_BINARY_OP_SCALE(intrinsiccv_add_abs_with_threshold, int16_t, - int16_t); +/// Adds the absolute values of the corresponding elements in `src_a` and +/// `src_b`. Then, performs a comparison of each element's value the in result +/// with respect to a caller defined threshold. The strictly larger elements +/// remain unchanged and the rest are to 0. +/// +/// The addition is saturated, i.e. the result is the largest number of the +/// type of the element if the addition result would overflow. Source data +/// length (in bytes) is `stride` * `height`. Width and height are the same +/// for the two sources. +/// +/// @param src_a Pointer to the first source data. Must be non-null. +/// @param src_b Pointer to the second source data. Must be non-null. +/// @param src_a_stride Distance in bytes from the start of one row to the +/// start of the next row for the first source data. +/// Must not be less than width * sizeof(type). +/// @param src_b_stride Distance in bytes from the start of one row to the +/// start of the next row for the second source data. +/// Must not be less than width * sizeof(type). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must not be less than width * sizeof(type). +/// @param width Number of elements in a row. +/// @param height Number of rows in the data. +/// @param threshold The value that the elements of the addition result +/// are compared to. +/// +intrinsiccv_error_t intrinsiccv_saturating_add_abs_with_threshold( + const int16_t *src_a, size_t src_a_stride, const int16_t *src_b, + size_t src_b_stride, int16_t *dst, size_t dst_stride, size_t width, + size_t height, int16_t threshold); /// Converts a grayscale image to RGB. All channels are 8-bit wide. /// diff --git a/intrinsiccv/src/analysis/canny_neon.cpp b/intrinsiccv/src/analysis/canny_neon.cpp index 733e7e08c9df3aab506e299891f3ea360ab082be..6b7352c867617b0419f7ab917348c983c7c72dab 100644 --- a/intrinsiccv/src/analysis/canny_neon.cpp +++ b/intrinsiccv/src/analysis/canny_neon.cpp @@ -505,7 +505,7 @@ extern "C" INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t intrinsiccv_canny_u8( // Calculate magnitude from the horizontal and vertical derivatives, and apply // lower threshold. - if (auto err = intrinsiccv_add_abs_with_threshold( + if (auto err = intrinsiccv_saturating_add_abs_with_threshold( &horizontal_gradient.rows()[0], horizontal_gradient.rows().stride(), &vertical_gradient.rows()[0], vertical_gradient.rows().stride(), &magnitudes.rows_without_margin()[0], diff --git a/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp b/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp index 2d994c0fac249b63a6cf2156f2b104e2cab48c8a..81c461374f172cdbad88f01007cb5a95c55404bd 100644 --- a/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp +++ b/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp @@ -11,41 +11,37 @@ namespace intrinsiccv { namespace neon { template -intrinsiccv_error_t add_abs_with_threshold(const T *src_a, size_t src_a_stride, - const T *src_b, size_t src_b_stride, - T *dst, size_t dst_stride, - size_t width, size_t height, - T threshold); +intrinsiccv_error_t saturating_add_abs_with_threshold( + const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, + T *dst, size_t dst_stride, size_t width, size_t height, T threshold); } // namespace neon namespace sve2 { template -intrinsiccv_error_t add_abs_with_threshold(const T *src_a, size_t src_a_stride, - const T *src_b, size_t src_b_stride, - T *dst, size_t dst_stride, - size_t width, size_t height, - T threshold); +intrinsiccv_error_t saturating_add_abs_with_threshold( + const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, + T *dst, size_t dst_stride, size_t width, size_t height, T threshold); } // namespace sve2 namespace sme2 { template -intrinsiccv_error_t add_abs_with_threshold(const T *src_a, size_t src_a_stride, - const T *src_b, size_t src_b_stride, - T *dst, size_t dst_stride, - size_t width, size_t height, - T threshold); +intrinsiccv_error_t saturating_add_abs_with_threshold( + const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, + T *dst, size_t dst_stride, size_t width, size_t height, T threshold); } // namespace sme2 -#define INTRINSICCV_DEFINE_C_API(name, type) \ - INTRINSICCV_MULTIVERSION_C_API( \ - name, intrinsiccv::neon::add_abs_with_threshold, \ - INTRINSICCV_SVE2_IMPL_IF( \ - intrinsiccv::sve2::add_abs_with_threshold), \ - intrinsiccv::sme2::add_abs_with_threshold, const type *, size_t, \ - const type *, size_t, type *, size_t, size_t, size_t, type) - -INTRINSICCV_DEFINE_C_API(intrinsiccv_add_abs_with_threshold, int16_t); +#define INTRINSICCV_DEFINE_C_API(name, type) \ + INTRINSICCV_MULTIVERSION_C_API( \ + name, intrinsiccv::neon::saturating_add_abs_with_threshold, \ + INTRINSICCV_SVE2_IMPL_IF( \ + intrinsiccv::sve2::saturating_add_abs_with_threshold), \ + intrinsiccv::sme2::saturating_add_abs_with_threshold, \ + const type *, size_t, const type *, size_t, type *, size_t, size_t, \ + size_t, type) + +INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_add_abs_with_threshold, + int16_t); } // namespace intrinsiccv diff --git a/intrinsiccv/src/arithmetics/add_abs_with_threshold_neon.cpp b/intrinsiccv/src/arithmetics/add_abs_with_threshold_neon.cpp index 6effc7dff30d5c2e161127f76f609688b28ce23b..cdc6d732f511c2704a34ec427dd4d16b2b05bfbd 100644 --- a/intrinsiccv/src/arithmetics/add_abs_with_threshold_neon.cpp +++ b/intrinsiccv/src/arithmetics/add_abs_with_threshold_neon.cpp @@ -10,38 +10,53 @@ namespace intrinsiccv::neon { template -class AddAbsWithThreshold final : public UnrollOnce, public UnrollTwice { +class SaturatingAddAbsWithThreshold final : public UnrollOnce, + public UnrollTwice { public: using VecTraits = neon::VecTraits; using VectorType = typename VecTraits::VectorType; - explicit AddAbsWithThreshold(ScalarType threshold) + explicit SaturatingAddAbsWithThreshold(ScalarType threshold) : threshold_{threshold}, threshold_vec_{vdupq_n_s16(threshold)} {} VectorType vector_path(VectorType src_a, VectorType src_b) { - VectorType add_abs = vaddq_s16(vabsq_s16(src_a), vabsq_s16(src_b)); + VectorType add_abs = vqaddq_s16(vqabsq_s16(src_a), vqabsq_s16(src_b)); return vandq_s16(add_abs, vcgtq_s16(add_abs, threshold_vec_)); } ScalarType scalar_path(ScalarType src_a, ScalarType src_b) { - ScalarType add_abs = std::abs(src_a) + std::abs(src_b); + ScalarType add_abs = 0; + + if (std::numeric_limits::is_signed && + src_a == std::numeric_limits::min()) { + src_a = std::numeric_limits::max(); + } else { + src_a = std::abs(src_a); + } + + if (std::numeric_limits::is_signed && + src_b == std::numeric_limits::min()) { + src_b = std::numeric_limits::max(); + } else { + src_b = std::abs(src_b); + } + + if (__builtin_add_overflow(src_a, src_b, &add_abs)) { + add_abs = std::numeric_limits::max(); + } return add_abs > threshold_ ? add_abs : 0; } private: ScalarType threshold_; VectorType threshold_vec_; -}; // end of class AddAbsWithThreshold +}; // end of class SaturatingAddAbsWithThreshold template -intrinsiccv_error_t add_abs_with_threshold(const T *src_a, size_t src_a_stride, - const T *src_b, size_t src_b_stride, - T *dst, size_t dst_stride, - size_t width, size_t height, - T threshold) { - CHECK_POINTERS(src_a, src_b, dst); - - AddAbsWithThreshold operation{threshold}; +intrinsiccv_error_t saturating_add_abs_with_threshold( + const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, + T *dst, size_t dst_stride, size_t width, size_t height, T threshold) { + SaturatingAddAbsWithThreshold operation{threshold}; Rectangle rect{width, height}; Rows src_a_rows{src_a, src_a_stride}; Rows src_b_rows{src_b, src_b_stride}; @@ -50,12 +65,12 @@ intrinsiccv_error_t add_abs_with_threshold(const T *src_a, size_t src_a_stride, return INTRINSICCV_OK; } -#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ - template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ - add_abs_with_threshold(const type *src_a, size_t src_a_stride, \ - const type *src_b, size_t src_b_stride, \ - type *dst, size_t dst_stride, size_t width, \ - size_t height, type threshold) +#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ + template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ + saturating_add_abs_with_threshold( \ + const type *src_a, size_t src_a_stride, const type *src_b, \ + size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ + size_t height, type threshold) INTRINSICCV_INSTANTIATE_TEMPLATE(int16_t); diff --git a/intrinsiccv/src/arithmetics/add_abs_with_threshold_sc.h b/intrinsiccv/src/arithmetics/add_abs_with_threshold_sc.h index c2fa1643d623c5e23e86058d5ad43a753ad47284..b4f07ff2da4333f363536e6901a25fda1bf47471 100644 --- a/intrinsiccv/src/arithmetics/add_abs_with_threshold_sc.h +++ b/intrinsiccv/src/arithmetics/add_abs_with_threshold_sc.h @@ -2,8 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 -#ifndef INTRINSICCV_ADD_ABS_WITH_THRESHOLD_SC_H -#define INTRINSICCV_ADD_ABS_WITH_THRESHOLD_SC_H +#ifndef INTRINSICCV_SATURATING_ADD_ABS_WITH_THRESHOLD_SC_H +#define INTRINSICCV_SATURATING_ADD_ABS_WITH_THRESHOLD_SC_H #include @@ -13,13 +13,13 @@ namespace intrinsiccv::sve2 { template -class AddAbsWithThreshold final : public UnrollTwice { +class SaturatingAddAbsWithThreshold final : public UnrollTwice { public: using ContextType = sve2::Context; using VecTraits = sve2::VecTraits; using VectorType = typename VecTraits::VectorType; - explicit AddAbsWithThreshold(ScalarType threshold) + explicit SaturatingAddAbsWithThreshold(ScalarType threshold) INTRINSICCV_STREAMING_COMPATIBLE : threshold_(threshold) {} VectorType vector_path(ContextType ctx, VectorType src_a, @@ -32,16 +32,14 @@ class AddAbsWithThreshold final : public UnrollTwice { private: ScalarType threshold_; -}; // end of class AddAbsWithThreshold +}; // end of class SaturatingAddAbsWithThreshold template -intrinsiccv_error_t add_abs_with_threshold_sc( +intrinsiccv_error_t saturating_add_abs_with_threshold_sc( const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, T *dst, size_t dst_stride, size_t width, size_t height, T threshold) INTRINSICCV_STREAMING_COMPATIBLE { - CHECK_POINTERS(src_a, src_b, dst); - - AddAbsWithThreshold operation{threshold}; + SaturatingAddAbsWithThreshold operation{threshold}; Rectangle rect{width, height}; Rows src_a_rows{src_a, src_a_stride}; Rows src_b_rows{src_b, src_b_stride}; @@ -53,4 +51,4 @@ intrinsiccv_error_t add_abs_with_threshold_sc( } // namespace intrinsiccv::sve2 -#endif // INTRINSICCV_ADD_ABS_WITH_THRESHOLD_SC_H +#endif // INTRINSICCV_SATURATING_ADD_ABS_WITH_THRESHOLD_SC_H diff --git a/intrinsiccv/src/arithmetics/add_abs_with_threshold_sme2.cpp b/intrinsiccv/src/arithmetics/add_abs_with_threshold_sme2.cpp index ffb4925b137be6a1838996724f22b3bee4263ccf..445d1119f28bed1171fb9d5ecfcc5eb21f4e4e26 100644 --- a/intrinsiccv/src/arithmetics/add_abs_with_threshold_sme2.cpp +++ b/intrinsiccv/src/arithmetics/add_abs_with_threshold_sme2.cpp @@ -8,20 +8,21 @@ namespace intrinsiccv::sme2 { template INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t -add_abs_with_threshold(const T *src_a, size_t src_a_stride, const T *src_b, - size_t src_b_stride, T *dst, size_t dst_stride, - size_t width, size_t height, T threshold) { - return sve2::add_abs_with_threshold_sc(src_a, src_a_stride, src_b, - src_b_stride, dst, dst_stride, width, - height, threshold); +saturating_add_abs_with_threshold(const T *src_a, size_t src_a_stride, + const T *src_b, size_t src_b_stride, T *dst, + size_t dst_stride, size_t width, + size_t height, T threshold) { + return sve2::saturating_add_abs_with_threshold_sc( + src_a, src_a_stride, src_b, src_b_stride, dst, dst_stride, width, height, + threshold); } -#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ - template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ - add_abs_with_threshold(const type *src_a, size_t src_a_stride, \ - const type *src_b, size_t src_b_stride, \ - type *dst, size_t dst_stride, size_t width, \ - size_t height, type threshold) +#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ + template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ + saturating_add_abs_with_threshold( \ + const type *src_a, size_t src_a_stride, const type *src_b, \ + size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ + size_t height, type threshold) INTRINSICCV_INSTANTIATE_TEMPLATE(int16_t); diff --git a/intrinsiccv/src/arithmetics/add_abs_with_threshold_sve2.cpp b/intrinsiccv/src/arithmetics/add_abs_with_threshold_sve2.cpp index 091747b2b6a3e37b5dfaa9da85855d9ece77e200..7779c8541f8297b56d8b93144be35b49c231e122 100644 --- a/intrinsiccv/src/arithmetics/add_abs_with_threshold_sve2.cpp +++ b/intrinsiccv/src/arithmetics/add_abs_with_threshold_sve2.cpp @@ -7,19 +7,22 @@ namespace intrinsiccv::sve2 { template -INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t add_abs_with_threshold( - const T *src_a, size_t src_a_stride, const T *src_b, size_t src_b_stride, - T *dst, size_t dst_stride, size_t width, size_t height, T threshold) { - return add_abs_with_threshold_sc(src_a, src_a_stride, src_b, src_b_stride, - dst, dst_stride, width, height, threshold); +INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t +saturating_add_abs_with_threshold(const T *src_a, size_t src_a_stride, + const T *src_b, size_t src_b_stride, T *dst, + size_t dst_stride, size_t width, + size_t height, T threshold) { + return saturating_add_abs_with_threshold_sc(src_a, src_a_stride, src_b, + src_b_stride, dst, dst_stride, + width, height, threshold); } -#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ - template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ - add_abs_with_threshold(const type *src_a, size_t src_a_stride, \ - const type *src_b, size_t src_b_stride, \ - type *dst, size_t dst_stride, size_t width, \ - size_t height, type threshold) +#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ + template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ + saturating_add_abs_with_threshold( \ + const type *src_a, size_t src_a_stride, const type *src_b, \ + size_t src_b_stride, type *dst, size_t dst_stride, size_t width, \ + size_t height, type threshold) INTRINSICCV_INSTANTIATE_TEMPLATE(int16_t); diff --git a/intrinsiccv/src/supported-types.md b/intrinsiccv/src/supported-types.md index acb3ae2dde161f6d0aaa4d40c71ccb5113c6fcc4..7fc270ffb46f72c6f2adb6fe91138c26446153ff 100644 --- a/intrinsiccv/src/supported-types.md +++ b/intrinsiccv/src/supported-types.md @@ -8,15 +8,15 @@ SPDX-License-Identifier: Apache-2.0 Note: functions listed here are not necessarily exposed to adapter API layer. ## Basic arithmetic operations -| | s8 | u8 | s16 | u16 | s32 | u32 | s64 | u64 | -|--------------------|-----|-----|-----|-----|-----|-----|-----|-----| -| Saturating Add | x | x | x | x | x | x | x | x | -| Saturating Sub | x | x | x | x | x | x | x | x | -| Saturating Absdiff | x | x | x | x | x | | | | -| Saturating Multiply| x | x | x | x | x | | | | -| Threshold binary | | x | | | | | | | -| AddAbsWithThreshold| | | x | | | | | | -| Scale | | x | | | | | | | +| | s8 | u8 | s16 | u16 | s32 | u32 | s64 | u64 | +|------------------------------|-----|-----|-----|-----|-----|-----|-----|-----| +| Saturating Add | x | x | x | x | x | x | x | x | +| Saturating Sub | x | x | x | x | x | x | x | x | +| Saturating Absdiff | x | x | x | x | x | | | | +| Saturating Multiply | x | x | x | x | x | | | | +| Threshold binary | | x | | | | | | | +| SaturatingAddAbsWithThreshold| | | x | | | | | | +| Scale | | x | | | | | | | ## Colour conversions | | gray-RGB | gray-RGBA | RGB-RGB | RGBA-RGBA | RGB-BGR | RGBA-BGRA | RGB-BGRA | RGB-RGBA | RGBA-BGR | RGBA-RGB | diff --git a/test/api/test_add_abs_with_threshold.cpp b/test/api/test_add_abs_with_threshold.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ce7cfa1d62631737c57143aa0fe292cd9af453dc --- /dev/null +++ b/test/api/test_add_abs_with_threshold.cpp @@ -0,0 +1,151 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "framework/operation.h" + +template +class SaturatingAddAbsWithThresholdTestBase + : public BinaryOperationTest { + protected: + // Calls the API-under-test in the appropriate way. + intrinsiccv_error_t call_api() override { + return intrinsiccv_saturating_add_abs_with_threshold( + this->inputs_[0].data(), this->inputs_[0].stride(), + this->inputs_[1].data(), this->inputs_[1].stride(), + this->actual_[0].data(), this->actual_[0].stride(), this->width(), + this->height(), this->threshold()); + } + + virtual ElementType threshold() = 0; +}; // end of class SaturatingAddAbsWithThresholdTestBase + +template +class SaturatingAddAbsWithThresholdTestPositive final + : public SaturatingAddAbsWithThresholdTestBase { + using Elements = typename BinaryOperationTest::Elements; + + ElementType threshold() override { return 50; } + + const std::vector& test_elements() override { + static const std::vector kTestElements = { + // clang-format off + {10, 39, 0}, + {10, 40, 0}, + {10, 41, 51}, + // clang-format on + }; + return kTestElements; + } +}; + +template +class SaturatingAddAbsWithThresholdTestNegative final + : public SaturatingAddAbsWithThresholdTestBase { + using Elements = typename BinaryOperationTest::Elements; + + ElementType threshold() override { return 50; } + + const std::vector& test_elements() override { + static const std::vector kTestElements = { + // clang-format off + {-10, -39, 0}, + {-10, -40, 0}, + {-10, -41, 51}, + // clang-format on + }; + return kTestElements; + } +}; + +template +class SaturatingAddAbsWithThresholdTestMin final + : public SaturatingAddAbsWithThresholdTestBase { + using Elements = typename BinaryOperationTest::Elements; + using BinaryOperationTest::min; + using BinaryOperationTest::max; + + ElementType threshold() override { return min(); } + + const std::vector& test_elements() override { + static const std::vector kTestElements = { + // clang-format off + { min(), min(), max()}, + { min(), 0, max()}, + { min(), 1, max()}, + { min(), max(), max()}, + // clang-format on + }; + return kTestElements; + } +}; + +template +class SaturatingAddAbsWithThresholdTestZero final + : public SaturatingAddAbsWithThresholdTestBase { + using Elements = typename BinaryOperationTest::Elements; + + ElementType threshold() override { return 1; } + + const std::vector& test_elements() override { + static const std::vector kTestElements = { + // clang-format off + { 0, 0, 0}, + { 0, 1, 0}, + { 0, 2, 2}, + // clang-format on + }; + return kTestElements; + } +}; + +template +class SaturatingAddAbsWithThresholdTestMax final + : public SaturatingAddAbsWithThresholdTestBase { + using Elements = typename BinaryOperationTest::Elements; + using BinaryOperationTest::max; + + ElementType threshold() override { return max() - 1; } + + const std::vector& test_elements() override { + static const std::vector kTestElements = { + // clang-format off + {0, max() - 2, 0}, + {0, max() - 1, 0}, + {0, max(), max()}, + // clang-format on + }; + return kTestElements; + } +}; + +template +class SaturatingAddAbsWithThreshold : public testing::Test {}; + +using ElementTypes = ::testing::Types; + +// Tests \ref saturating_add_abs_with_threshold API. +TYPED_TEST_SUITE(SaturatingAddAbsWithThreshold, ElementTypes); + +TYPED_TEST(SaturatingAddAbsWithThreshold, TestPositive) { + SaturatingAddAbsWithThresholdTestPositive{}.test(); +} + +TYPED_TEST(SaturatingAddAbsWithThreshold, TestNegative) { + SaturatingAddAbsWithThresholdTestNegative{}.test(); +} + +TYPED_TEST(SaturatingAddAbsWithThreshold, TestMin) { + SaturatingAddAbsWithThresholdTestMin{}.test(); +} + +TYPED_TEST(SaturatingAddAbsWithThreshold, TestZero) { + SaturatingAddAbsWithThresholdTestZero{}.test(); +} + +TYPED_TEST(SaturatingAddAbsWithThreshold, TestMax) { + SaturatingAddAbsWithThresholdTestMax{}.test(); +}