diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 9f8a56b169d1af2daa2663263887d7e85109414a..1adf8c3d038b7fa202bc8ceb3ce3eeb71767abd3 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -144,6 +144,16 @@ static void min_max_loc_u8(benchmark::State& state) { } BENCHMARK(min_max_loc_u8); +static void sum_f32(benchmark::State& state) { + bench_functor(state, []() { + float total; + (void)kleidicv_sum_f32(get_source_buffer_a(), + image_width * sizeof(float), image_width, + image_height, &total); + }); +} +BENCHMARK(sum_f32); + template static void scale(Function f, float factor, float shift, benchmark::State& state) { diff --git a/doc/functionality.md b/doc/functionality.md index 4937516891dc900d17ac02d58ce884ff9cd51f37..70dec5b55896c0c50ca02b6f1f81481c032cb3dd 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -58,6 +58,7 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. ## Aggregate operations | | s8 | u8 | s16 | u16 | s32 | u32 | s64 | f32 | |-----------------|-----|-----|-----|-----|-----|-----|-----|-----| +| Sum | | | | | | | | x | | Minmax | x | x | x | x | x | | | x | | Minmax loc | | x | | | | | | | | Count non-zeros | | x | | | | | | | diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index a31577a0502e2bb4abce74b8768b77a676867501..590583a649485ec259918dca959d743ef36c6a31 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1560,6 +1560,20 @@ KLEIDICV_API_DECLARATION(kleidicv_min_max_loc_u8, const uint8_t *src, size_t src_stride, size_t width, size_t height, size_t *min_offset, size_t *max_offset); +/// Returns the sum of element values across the source data. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row in the source data. Must be a +/// multiple of `sizeof(type)` and no less than `width * +/// sizeof(type)`, except for single-row images. +/// @param width Number of elements in a row. Must be greater than 0. +/// @param height Number of rows in the data. Must be greater than 0. +/// @param sum Pointer to save result offset of sum value to. +/// +KLEIDICV_API_DECLARATION(kleidicv_sum_f32, const float *src, size_t src_stride, + size_t width, size_t height, float *sum); + /// Multiplies the elements in `src` by `scale`, then adds `shift` to the /// result and stores it in `dst`. /// diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index 96c1abba3b4ca8c62cdd3ef4e2a4dda5eae5fb37..df042a7dfa8fa90ab772bcb06036c3423dbe339e 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -67,8 +67,27 @@ NEON_BINARY_OP_Q_B8_B16_B32_B64(vzip2q); #undef NEON_BINARY_OP_Q_B8_B16_B32_B64 +static inline float32x4_t vaddq(float32x4_t lhs, float32x4_t rhs) { + return vaddq_f32(lhs, rhs); +} + // clang-format off +// ----------------------------------------------------------------------------- +// vaddv* +// ----------------------------------------------------------------------------- + +static inline int8_t vaddvq(int8x16_t vec) { return vaddvq_s8(vec); } +static inline uint8_t vaddvq(uint8x16_t vec) { return vaddvq_u8(vec); } +static inline int16_t vaddvq(int16x8_t vec) { return vaddvq_s16(vec); } +static inline uint16_t vaddvq(uint16x8_t vec) { return vaddvq_u16(vec); } +static inline int32_t vaddvq(int32x4_t vec) { return vaddvq_s32(vec); } +static inline uint32_t vaddvq(uint32x4_t vec) { return vaddvq_u32(vec); } +static inline int64_t vaddvq(int64x2_t vec) { return vaddvq_s64(vec); } +static inline uint64_t vaddvq(uint64x2_t vec) { return vaddvq_u64(vec); } +static inline float32_t vaddvq(float32x4_t vec) { return vaddvq_f32(vec); } +static inline float64_t vaddvq(float64x2_t vec) { return vaddvq_f64(vec); } + // ----------------------------------------------------------------------------- // vabd* // ----------------------------------------------------------------------------- diff --git a/kleidicv/src/arithmetics/sum_api.cpp b/kleidicv/src/arithmetics/sum_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2846cbc83419dbd09df8c357c1cae3e331ea05cc --- /dev/null +++ b/kleidicv/src/arithmetics/sum_api.cpp @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" + +namespace kleidicv { + +namespace neon { + +template +kleidicv_error_t sum(const T *src, size_t src_stride, size_t width, + size_t height, T *sum); + +} // namespace neon + +} // namespace kleidicv + +KLEIDICV_MULTIVERSION_C_API(kleidicv_sum_f32, &kleidicv::neon::sum, + nullptr, nullptr); diff --git a/kleidicv/src/arithmetics/sum_neon.cpp b/kleidicv/src/arithmetics/sum_neon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f9cbd27574f171e6720ae2eb07c9eca0b671cd91 --- /dev/null +++ b/kleidicv/src/arithmetics/sum_neon.cpp @@ -0,0 +1,54 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" +#include "kleidicv/utils.h" + +namespace kleidicv::neon { + +template +class Sum final : public UnrollTwice { + public: + using VecTraits = neon::VecTraits; + using VectorType = typename VecTraits::VectorType; + VectorType vector_sum; + ScalarType scalar_sum; + + Sum() : vector_sum(VectorType{0}), scalar_sum(0) {} + + void vector_path(VectorType src) { vector_sum = vaddq(src, vector_sum); } + + void scalar_path(ScalarType src) { scalar_sum += src; } + + ScalarType get_sum() { return vaddvq(vector_sum) + scalar_sum; } +}; + +template +kleidicv_error_t sum(const ScalarType *src, size_t src_stride, size_t width, + size_t height, ScalarType *sum) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_IMAGE_SIZE(width, height); + if (sum == nullptr) { + return KLEIDICV_ERROR_NULL_POINTER; + } + + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Sum operation; + apply_operation_by_rows(operation, rect, src_rows); + + *sum = operation.get_sum(); + + return KLEIDICV_OK; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t sum( \ + const type *src, size_t src_stride, size_t width, size_t height, \ + type *sum) + +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::neon diff --git a/test/api/test_sum.cpp b/test/api/test_sum.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e2002f4c6409b0289c3eb33766ca256c2550fd9d --- /dev/null +++ b/test/api/test_sum.cpp @@ -0,0 +1,246 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "kleidicv/kleidicv.h" + +TEST(Sum, Ones) { + // clang-format off + float src[] = { + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 4 * sizeof(float), 4, 4, &sum)); + EXPECT_FLOAT_EQ(16, sum); +} + +TEST(Sum, NegativeOnes) { + // clang-format off + float src[] = { + -1, -1, -1, -1, + -1, -1, -1, -1, + -1, -1, -1, -1, + -1, -1, -1, -1, + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 4 * sizeof(float), 4, 4, &sum)); + EXPECT_FLOAT_EQ(-16, sum); +} + +TEST(Sum, Zeroes) { + // clang-format off + float src[] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 4 * sizeof(float), 4, 4, &sum)); + EXPECT_FLOAT_EQ(0, sum); +} + +TEST(Sum, Single) { + // clang-format off + float src[] = { + 1 + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 1 * sizeof(float), 1, 1, &sum)); + EXPECT_FLOAT_EQ(1, sum); +} + +TEST(Sum, Empty) { + // clang-format off + float src[] = {}; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 0, 0, 0, &sum)); + EXPECT_FLOAT_EQ(0, sum); +} + +TEST(Sum, WidthGreaterThanLanes) { + // clang-format off + float src[] = { + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 5 * sizeof(float), 5, 5, &sum)); + EXPECT_FLOAT_EQ(25, sum); +} + +TEST(Sum, WidthGreaterThanLanesPadded) { + // clang-format off + float src[] = { + 1, 1, 1, 1, 1, 9, 9, + 1, 1, 1, 1, 1, 9, 9, + 1, 1, 1, 1, 1, 9, 9, + 1, 1, 1, 1, 1, 9, 9, + 1, 1, 1, 1, 1, 9, 9 + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 7 * sizeof(float), 5, 5, &sum)); + EXPECT_FLOAT_EQ(25, sum); +} + +TEST(Sum, WidthLessThanLanes) { + // clang-format off + float src[] = { + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1 + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 3 * sizeof(float), 3, 5, &sum)); + EXPECT_FLOAT_EQ(15, sum); +} + +TEST(Sum, WidthLessThanLanesPadded) { + // clang-format off + float src[] = { + 1, 1, 1, 9, 9, + 1, 1, 1, 9, 9, + 1, 1, 1, 9, 9, + 1, 1, 1, 9, 9, + 1, 1, 1, 9, 9 + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 5 * sizeof(float), 3, 5, &sum)); + EXPECT_FLOAT_EQ(15, sum); +} + +TEST(Sum, FloatMax) { + // clang-format off + float src[] = { + std::numeric_limits::max(), 1 + }; + // clang-format on + + float sum = -1; + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 2 * sizeof(float), 2, 1, &sum)); + EXPECT_EQ(sum, std::numeric_limits::max()); +} + +TEST(Sum, FloatLowest) { + // clang-format off + float src[] = { + std::numeric_limits::lowest() , -1 + }; + // clang-format on + + float sum = -1; + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 2 * sizeof(float), 2, 1, &sum)); + EXPECT_EQ(sum, std::numeric_limits::lowest()); +} + +TEST(Sum, FloatInfinity) { + // clang-format off + float src[] = { + std::numeric_limits::max(), std::numeric_limits::max() + }; + // clang-format on + + float sum = -1; + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 2 * sizeof(float), 2, 1, &sum)); + EXPECT_EQ(sum, std::numeric_limits::infinity()); +} + +TEST(Sum, FloatNegativeInfinity) { + // clang-format off + float src[] = { + std::numeric_limits::lowest(), std::numeric_limits::lowest() + }; + // clang-format on + + float sum = -1; + + EXPECT_EQ(KLEIDICV_OK, kleidicv_sum_f32(src, 2 * sizeof(float), 2, 1, &sum)); + EXPECT_EQ(sum, -std::numeric_limits::infinity()); +} + +TEST(Sum, OversizeImage) { + // clang-format off + float src[] = {}; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_sum_f32(src, 1 * sizeof(float), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, &sum)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_sum_f32(src, 1 * sizeof(float), 1, + KLEIDICV_MAX_IMAGE_PIXELS + 1, &sum)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + kleidicv_sum_f32(src, 1 * sizeof(float), KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, &sum)); +} + +TEST(Sum, NullPointers) { + // clang-format off + float src[] = {}; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_ERROR_NULL_POINTER, + kleidicv_sum_f32(nullptr, 0, 0, 0, &sum)); + EXPECT_EQ(KLEIDICV_ERROR_NULL_POINTER, + kleidicv_sum_f32(src, 0, 0, 0, nullptr)); +} + +TEST(Sum, Misalignment) { + // clang-format off + float src[] = { + 1, 1, + 1, 1 + }; + // clang-format on + + float sum = std::numeric_limits::max(); + + EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, + kleidicv_sum_f32(src, sizeof(float) + 1, 2, 2, &sum)); +}