diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 59825c41dc82726e5bc8d1a61b371da2068155d4..29e0ab8e216020c0510fc9f9ecc41344be1ca004 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -290,6 +290,25 @@ BENCH_GAUSSIAN_BLUR(7, 3); BENCH_GAUSSIAN_BLUR(15, 1); BENCH_GAUSSIAN_BLUR(15, 3); +static void float_sum(benchmark::State& state) { + std::vector src; + std::vector dst; + + src.resize(image_width * image_height); + dst.resize(image_width * image_height); + + std::mt19937 generator; + std::generate(src.begin(), src.end(), generator); + + for (auto _ : state) { + auto unused = kleidicv_float_sum(src.data(), image_width, dst.data(), + image_width, image_height); + (void)unused; + } +} + +BENCHMARK(float_sum); + template static void sobel_filter(Function f, benchmark::State& state) { bench_functor(state, [f]() { diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 10a8e945ed936dae19d2b6d56cd5e8e0c0252893..7d69ae51a7cb42a83ea95db2a52ac38760bd1fd1 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1525,6 +1525,17 @@ KLEIDICV_API_DECLARATION(kleidicv_float_conversion_u8_f32, const uint8_t *src, size_t src_stride, float *dst, size_t dst_stride, size_t width, size_t height); +KLEIDICV_API_DECLARATION(kleidicv_float_sum, const float *src, + size_t src_stride, float *dst, size_t width, + size_t height); + +// // Implementation of Sum of floats of all the elements of an array up to 4 +// // channels +// KLEIDICV_API_DECLARATION(kleidicv_float_sum_mult_channels, const float *src, +// size_t src_stride, float *dst, size_t width, +// size_t height, size_t channels, size_t +// channel_index); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv/src/arithmetics/sumf_api.cpp b/kleidicv/src/arithmetics/sumf_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cdf5507dde320374ed899ec112becf4a2189799f --- /dev/null +++ b/kleidicv/src/arithmetics/sumf_api.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "kleidicv/ctypes.h" +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/types.h" + +namespace kleidicv { +namespace neon { +template + +kleidicv_error_t float_sum(const T *src, size_t src_stride, T *dst, + size_t width, size_t height); + +} // namespace neon + +} // namespace kleidicv + +#define KLEIDICV_DEFINE_C_API(name, type) \ + KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::float_sum, nullptr, \ + nullptr) + +KLEIDICV_DEFINE_C_API(kleidicv_float_sum, float); diff --git a/kleidicv/src/arithmetics/sumf_mult_channels_neon.cpp b/kleidicv/src/arithmetics/sumf_mult_channels_neon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c111df987215c3b42097128f145d63c58849ec9 --- /dev/null +++ b/kleidicv/src/arithmetics/sumf_mult_channels_neon.cpp @@ -0,0 +1,63 @@ +// // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// // +// // SPDX-License-Identifier: Apache-2.0 + +// #include +// #include + +// #include "kleidicv/ctypes.h" +// #include "kleidicv/dispatch.h" +// #include "kleidicv/kleidicv.h" +// #include "kleidicv/neon.h" +// #include "kleidicv/types.h" + +// namespace kleidicv::neon { + +// template +// kleidicv_error_t float_sum_mult_channels(const T *src, size_t src_stride, +// T *dst, size_t width, size_t height, +// size_t channels, +// size_t channel_index = 0) { +// if (channel_index >= channels) { +// float total_sum = 0.0F; +// for (size_t i = 0; i < channels; ++i) { +// total_sum += dst[i]; +// } +// *dst = total_sum; +// return KLEIDICV_OK; +// } + +// const T *channel_ptr = src + channel_index * width * height; + +// float sum = 0.0F; + +// for (size_t j = 0; j < height; ++j) { +// const T *row_ptr = channel_ptr + j * width; + +// size_t i = 0; +// for (; i + 3 < width; i += 4) { +// float32x4_t vsrc = vld1q_f32(row_ptr + i); +// float32x2_t vpair_sum = +// vpadd_f32(vget_low_f32(vsrc), vget_high_f32(vsrc)); +// sum += vget_lane_f32(vpair_sum, 0) + vget_lane_f32(vpair_sum, 1); +// } + +// for (; i < width; ++i) { +// sum += row_ptr[i]; +// } +// } +// dst[channel_index] = sum; +// return float_sum_mult_channels(src, src_stride, dst, width, height, +// channels, +// channel_index + 1); +// } + +// #define KLEIDICV_INSTANTIATE_TEMPLATE(type) +// template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +// float_sum_mult_channels(const type *src, size_t src_stride, type +// *dst, +// size_t width, size_t height, size_t channels, +// size_t channel_index = 0) + +// KLEIDICV_INSTANTIATE_TEMPLATE(float); +// } // namespace kleidicv::neon diff --git a/kleidicv/src/arithmetics/sumf_multi_channels_neon_api.cpp b/kleidicv/src/arithmetics/sumf_multi_channels_neon_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..47e04d20690a179f61e5fb8ceece5b3f16b52ab1 --- /dev/null +++ b/kleidicv/src/arithmetics/sumf_multi_channels_neon_api.cpp @@ -0,0 +1,30 @@ +// // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// // +// // SPDX-License-Identifier: Apache-2.0 + +// #include + +// #include + +// #include "kleidicv/ctypes.h" +// #include "kleidicv/dispatch.h" +// #include "kleidicv/kleidicv.h" +// #include "kleidicv/types.h" + +// namespace kleidicv { +// namespace neon { +// template +// kleidicv_error_t float_sum_mult_channels(const T *src, size_t src_stride, +// T *dst, size_t width, size_t height, +// size_t channels, +// size_t channel_index = 0); + +// } // namespace neon + +// } // namespace kleidicv + +// #define KLEIDICV_DEFINE_C_API(name, type) +// KLEIDICV_MULTIVERSION_C_API( +// name, &kleidicv::neon::float_sum_mult_channels, nullptr, nullptr) + +// KLEIDICV_DEFINE_C_API(kleidicv_float_sum_mult_channels, float); diff --git a/kleidicv/src/arithmetics/sumf_neon.cpp b/kleidicv/src/arithmetics/sumf_neon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..86c53c9113aeb051746480750306cbc91a41fe49 --- /dev/null +++ b/kleidicv/src/arithmetics/sumf_neon.cpp @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include "kleidicv/ctypes.h" +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" +#include "kleidicv/types.h" + +namespace kleidicv::neon { + +template +kleidicv_error_t float_sum(const T *src, size_t src_stride, T *dst, + size_t width, size_t height) { + (*dst) = 0.0F; + + for (size_t j = 0; j < height; ++j) { + const T *row_ptr = src + j * src_stride / sizeof(T); + + size_t i = 0; + for (; i + 3 < width; i += 4) { + float32x4_t vsrc = vld1q_f32(row_ptr + i); + + float32x2_t vpair_sum = + vpadd_f32(vget_low_f32(vsrc), vget_high_f32(vsrc)); + float temp_sum = + vget_lane_f32(vpair_sum, 0) + vget_lane_f32(vpair_sum, 1); + + (*dst) += temp_sum; + } + + for (; i < width; ++i) { + (*dst) += row_ptr[i]; + } + } + + return KLEIDICV_OK; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t float_sum( \ + const type *src, size_t src_stride, type *dst, size_t width, \ + size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float); +} // namespace kleidicv::neon diff --git a/test/api/test_sumf.cpp b/test/api/test_sumf.cpp new file mode 100644 index 0000000000000000000000000000000000000000..67609de204e0e4a06518fae552b498ea6a43feba --- /dev/null +++ b/test/api/test_sumf.cpp @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include +#include + +#include "framework/operation.h" +#include "kleidicv/ctypes.h" +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/types.h" +#include "test_config.h" + +template +class FloatSum : public testing::Test {}; +using ElementTypes = ::testing::Types; +TYPED_TEST_SUITE(FloatSum, ElementTypes); + +TYPED_TEST(FloatSum, Sum3x5Array) { + float src[5][3] = {{1.0F, 2.0F, 3.0F}, + {4.0F, 5.0F, 6.0F}, + {7.0F, 8.0F, 9.0F}, + {10.0F, 11.0F, 12.0F}, + {13.0F, 14.0F, 15.0F}}; + float dst[1]; + + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(reinterpret_cast(src), + sizeof(src[0]), dst, 3, 5)); + EXPECT_EQ(120.0F, *dst); +} + +TYPED_TEST(FloatSum, Sum3x5ArrayInf) { + float src[5][3] = {{std::numeric_limits::infinity(), 2.0F, 3.0F}, + {4.0F, 5.0F, 6.0F}, + {7.0F, 8.0F, 9.0F}, + {10.0F, 11.0F, 12.0F}, + {13.0F, 14.0F, 15.0F}}; + float dst[1]; + + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(reinterpret_cast(src), + sizeof(src[0]), dst, 3, 5)); + EXPECT_EQ(std::numeric_limits::infinity(), *dst); +} + +TYPED_TEST(FloatSum, AddingInfinity) { + TypeParam src[4] = {1.5, 1.7, 6.1, + -std::numeric_limits::infinity()}, + dst[1]; + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(src, 4 * sizeof(TypeParam), dst, 4, 1.)); + EXPECT_EQ(-std::numeric_limits::infinity(), *dst); +} + +TYPED_TEST(FloatSum, newtest) { + TypeParam src[4] = {1.5, 1.7}, dst[1]; + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(src, sizeof(TypeParam), dst, 1, 2.)); + EXPECT_EQ(static_cast(3.2), *dst); +} + +TYPED_TEST(FloatSum, Array) { + TypeParam src[5] = {1.5, 1.7, 6.1, 7.1, 1.0}, dst[1]; + EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum(src, 1, dst, 5, 1)); + EXPECT_EQ(static_cast(17.4), *dst); +} + +TYPED_TEST(FloatSum, Array4) { + TypeParam src[8] = {1.5, 1.7, 6.1, 7.1, 1.5, 1.7, 6.1, 7.1}, dst[1]; + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(src, sizeof(TypeParam) * 2, dst, 2, 4)); + EXPECT_EQ(static_cast(32.8), *dst); +} + +TYPED_TEST(FloatSum, Infinity) { + TypeParam src[1] = {std::numeric_limits::infinity()}, dst[1]; + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(src, sizeof(TypeParam), dst, 1, 1.)); + EXPECT_EQ(std::numeric_limits::infinity(), *dst); +} + +TYPED_TEST(FloatSum, 2Darray) { + TypeParam src[6] = {1.5, 1.7, 6.1, 7.1, 1.0, 1.0}, dst[1]; + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(src, sizeof(TypeParam) * 3, dst, 3, 2.)); + EXPECT_EQ(static_cast(18.4), *dst); +} + +TYPED_TEST(FloatSum, array) { + TypeParam src[10] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, + dst[1]; + EXPECT_EQ(KLEIDICV_OK, + kleidicv_float_sum(src, sizeof(TypeParam) * 1, dst, 1, 10.)); + EXPECT_EQ(static_cast(10), *dst); +} diff --git a/test/api/test_sumf_mult_channels.cpp b/test/api/test_sumf_mult_channels.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f86983d1e40212a2b9290c6c779950edb685d957 --- /dev/null +++ b/test/api/test_sumf_mult_channels.cpp @@ -0,0 +1,137 @@ +// // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// // +// // SPDX-License-Identifier: Apache-2.0 + +// #include +// #include + +// #include +// #include + +// #include "framework/operation.h" +// #include "kleidicv/ctypes.h" +// #include "kleidicv/dispatch.h" +// #include "kleidicv/kleidicv.h" +// #include "kleidicv/types.h" +// #include "test_config.h" + +// template +// class FloatSumMultChan : public testing::Test {}; +// using ElementTypes = ::testing::Types; +// TYPED_TEST_SUITE(FloatSumMultChan, ElementTypes); + +// TYPED_TEST(FloatSumMultChan, Sum2x2ArrayWith4Channels) { +// float src[2][2][4] = {{{1.0, 1.0, 1.0, 1.0}, {1.0, 1.0, 1.0, 1.0}}, +// {{1.0, 1.0, 1.0, 1.0}, {1.0, 1.0, 1.0, 1.0}}}; +// float dst[4] = {0}; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// reinterpret_cast(src), +// sizeof(src[0]), dst, 2, 2, 4, 0)); + +// EXPECT_EQ(16.0F, *dst); +// } +// TYPED_TEST(FloatSumMultChan, Sum2x2ArrayWith2Channels) { +// float src[2][2][2] = {{{100.0, 200.0}, {10000.0, 4000.0}}, +// {{108.0, 100.0}, {6000.0, 5000.0}}}; +// float dst[4] = {0}; + +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// reinterpret_cast(src), +// sizeof(src[0]), dst, 2, 2, 2, 0)); + +// float expected_sum1 = 100.0 + 200.0 + 10000.0 + 4000.0; +// float expected_sum2 = 108.0 + 100.0 + 6000.0 + 5000.0; +// float combined_sum = expected_sum1 + expected_sum2; + +// EXPECT_EQ(combined_sum, *dst); +// } + +// TYPED_TEST(FloatSumMultChan, Sum3x5Array) { +// float src[5][3] = {{1.0F, 2.0F, 3.0F}, +// {4.0F, 5.0F, 6.0F}, +// {7.0F, 8.0F, 9.0F}, +// {10.0F, 11.0F, 12.0F}, +// {13.0F, 14.0F, 15.0F}}; +// float dst[1]; + +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// reinterpret_cast(src), +// sizeof(src[0]), dst, 3, 5, 1, 0)); +// EXPECT_EQ(120.0F, *dst); +// } + +// TYPED_TEST(FloatSumMultChan, Sum3x5ArrayInf) { +// float src[5][3] = +// {{std::numeric_limits::infinity(), 2.0F, 3.0F}, +// {4.0F, 5.0F, 6.0F}, +// {7.0F, 8.0F, 9.0F}, +// {10.0F, 11.0F, 12.0F}, +// {13.0F, 14.0F, 15.0F}}; +// float dst[1]; + +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// reinterpret_cast(src), +// sizeof(src[0]), dst, 3, 5, 1, 0)); +// EXPECT_EQ(std::numeric_limits::infinity(), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, AddingInfinity) { +// TypeParam src[4] = {1.5, 1.7, 6.1, +// -std::numeric_limits::infinity()}, +// dst[1]; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// src, 4 * sizeof(TypeParam), dst, 4, 1, 1, 0.)); +// EXPECT_EQ(-std::numeric_limits::infinity(), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, newtest) { +// TypeParam src[4] = {1.5, 1.7}, dst[1]; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// src, sizeof(TypeParam), dst, 1, 2, 1, 0)); +// EXPECT_EQ(static_cast(3.2), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, Array) { +// TypeParam src[5] = {1.5, 1.7, 6.1, 7.1, 1.0}, dst[1]; +// EXPECT_EQ(KLEIDICV_OK, +// kleidicv_float_sum_mult_channels(src, 1, dst, 5, 1, 1, 0)); +// EXPECT_EQ(static_cast(17.4), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, Array4) { +// TypeParam src[8] = {1.5, 1.7, 6.1, 7.1, 1.5, 1.7, 6.1, 7.1}, dst[1]; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// src, sizeof(TypeParam) * 2, dst, 2, 4, 1, 0)); +// EXPECT_EQ(static_cast(32.8), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, Infinity) { +// TypeParam src[1] = {std::numeric_limits::infinity()}, dst[1]; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// src, sizeof(TypeParam), dst, 1, 1, 1, 0)); +// EXPECT_EQ(std::numeric_limits::infinity(), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, 2Darray) { +// TypeParam src[6] = {1.5, 1.7, 6.1, 7.1, 1.0, 1.0}, dst[1]; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// src, sizeof(TypeParam) * 3, dst, 3, 2, 1, 0)); +// EXPECT_EQ(static_cast(18.4), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, array) { +// TypeParam src[10] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, +// dst[1]; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// src, sizeof(TypeParam) * 1, dst, 1, 10, 1, 0.)); +// EXPECT_EQ(static_cast(10), *dst); +// } + +// TYPED_TEST(FloatSumMultChan, testno2) { +// TypeParam src[10] = {std::numeric_limits::infinity(), 5.0, 4, +// 1}, +// dst[1]; +// EXPECT_EQ(KLEIDICV_OK, kleidicv_float_sum_mult_channels( +// src, sizeof(TypeParam) * 6, dst, 6, 10, 1, 0.)); +// EXPECT_EQ(std::numeric_limits::infinity(), *dst); +// }