diff --git a/CHANGELOG.md b/CHANGELOG.md index 096a534e628ac7f4b4cf5916d4ac6016de526359..07f0f59a7db5fa4244aab38e747434395f1bfa60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ This changelog aims to follow the guiding principles of - Exponential function for float. - Bitwise and. - Gaussian Blur for 7x7 kernels. +- Scale function for float. ### Fixed diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 627fb545d1af976c558545610688ab18559641bb..1ae98c562cb7aff84cc590b4cab97cd364f9fbd0 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -699,11 +699,19 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, return CV_HAL_ERROR_NOT_IMPLEMENTED; } - if (src_depth == CV_8U) { - return convert_error(kleidicv_scale_u8( - reinterpret_cast(src_data), src_step, - reinterpret_cast(dst_data), dst_step, width, height, - static_cast(scale), static_cast(shift))); + switch (src_depth) { + case CV_8U: + return convert_error(kleidicv_scale_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height, + static_cast(scale), static_cast(shift))); + case CV_32F: + return convert_error(kleidicv_scale_f32( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height, + static_cast(scale), static_cast(shift))); + default: + break; } } diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 780400ec9e64080a03fcf3cad5c1a2a1928c2041..4ea11cca07bb96cfd28de67d40a0d62a1571467a 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -125,6 +125,37 @@ static void min_max_loc_u8(benchmark::State& state) { } BENCHMARK(min_max_loc_u8); +template +static void scale(Function f, float factor, float shift, + benchmark::State& state) { + // Setup + std::vector src, dst; + src.resize(image_width * image_height); + dst.resize(image_width * image_height); + + std::mt19937 generator; + std::generate(src.begin(), src.end(), generator); + + for (auto _ : state) { + // This code gets benchmarked + auto unused = + f(src.data(), image_width * sizeof(T), dst.data(), + image_width * sizeof(T), image_width, image_height, factor, shift); + (void)unused; + } +} + +#define BENCH_SCALE(benchname, name, factor, shift, type) \ + static void benchname(benchmark::State& state) { \ + scale(kleidicv_##name, factor, shift, state); \ + } \ + BENCHMARK(benchname) + +BENCH_SCALE(scale_u8_1, scale_u8, 1.0, 4.567, uint8_t); +BENCH_SCALE(scale_u8_generic, scale_u8, 1.234, 4.567, uint8_t); +BENCH_SCALE(scale_f32_1, scale_f32, 1.0, 4.567, float); +BENCH_SCALE(scale_f32_generic, scale_f32, 1.234, 4.567, float); + template static void resize_linear(F f, size_t scale_x, size_t scale_y, benchmark::State& state) { diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 7bb122fd65e206a05b9cea6b5662a44370490fc1..360a57639ffe1eb991934d8f618352ec2330f83a 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -37,6 +37,7 @@ add_executable( test_sobel.cpp test_exp.cpp test_float_conv.cpp + test_scale.cpp ) target_link_libraries( @@ -78,7 +79,7 @@ add_executable( test_sobel.cpp test_exp.cpp test_float_conv.cpp - + test_scale.cpp ) target_link_libraries( diff --git a/conformity/opencv/test_scale.cpp b/conformity/opencv/test_scale.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f669f1fa35b1e36a09c537aec8ae72b68f3650e1 --- /dev/null +++ b/conformity/opencv/test_scale.cpp @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 +#include "test_scale.h" + +#include +#include +#include + +#include "opencv2/core/hal/interface.h" + +template +cv::Mat exec_scale(cv::Mat& input_mat) { + cv::Mat result; + input_mat.convertTo(result, -1, Scale / 1000.0, Shift / 1000.0); + return result; +} + +#if MANAGER +template +bool test_scale(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input_mat(x, y, Format); + rng.fill(input_mat, cv::RNG::NORMAL, 0.0, 1.0e10); + cv::Mat actual_mat = exec_scale(input_mat); + cv::Mat expected_mat = get_expected_from_subordinate( + index, request_queue, reply_queue, input_mat); + + bool success = + (CV_MAT_DEPTH(Format) == CV_32F && + !are_float_matrices_different(1e-5, actual_mat, + expected_mat)) || + (CV_MAT_DEPTH(Format) == CV_8U && + !are_matrices_different(0, actual_mat, expected_mat)); + if (!success) { + fail_print_matrices(x, y, input_mat, actual_mat, expected_mat); + } + } + } + + return false; +} +#endif + +std::vector& scale_tests_get() { + // clang-format off + static std::vector tests = { + TEST("Scale float32, scale=1.0, shift=2.0", (test_scale<1000, 2000, CV_32FC4>), (exec_scale<1000, 2000>)), + TEST("Scale float32, scale=-10.0, shift=0.0", (test_scale<(-10000), 0, CV_32FC4>), (exec_scale<(-10000), 0>)), + TEST("Scale float32, scale=3.14, shift=2.72", (test_scale<3140, 2720, CV_32FC4>), (exec_scale<3140, 2720>)), + TEST("Scale float32, scale=7e5, shift=8e-3", (test_scale<700000000, 8, CV_32FC4>), (exec_scale<700000000, 8>)), + TEST("Scale float32, scale=1e-3, shift=8e-3", (test_scale<1, 8, CV_32FC4>), (exec_scale<1, 8>)), + + TEST("Scale uint8, scale=1.0, shift=2.0", (test_scale<1000, 2000, CV_8UC4>), (exec_scale<1000, 2000>)), + TEST("Scale uint8, scale=-10.0, shift=0.0", (test_scale<(-10000), 0, CV_8UC4>), (exec_scale<(-10000), 0>)), + TEST("Scale uint8, scale=3.14, shift=-2.72", (test_scale<3140, -2720, CV_8UC4>), (exec_scale<3140, -2720>)), + TEST("Scale uint8, scale=17.17, shift=3.9", (test_scale<17170, 3900, CV_8UC4>), (exec_scale<17170, 3900>)), + TEST("Scale uint8, scale=0.13, shift=230", (test_scale<130, 230000, CV_8UC4>), (exec_scale<130, 230000>)), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/test_scale.h b/conformity/opencv/test_scale.h new file mode 100644 index 0000000000000000000000000000000000000000..86916230aac41a51bb6c6bec78e26ca71b070401 --- /dev/null +++ b/conformity/opencv/test_scale.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_SCALE_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_SCALE_H_ + +#include + +#include "tests.h" + +std::vector& scale_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_SCALE_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 8064a6565ec987aa949cc235ed83f686fceede89..b7f3e38aaef207c3f40199bb127dae6e7d0fef66 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -16,6 +16,7 @@ #include "test_gaussian_blur.h" #include "test_min_max.h" #include "test_rgb2yuv.h" +#include "test_scale.h" #include "test_sobel.h" static std::vector merge_tests( @@ -28,15 +29,10 @@ static std::vector merge_tests( return all_tests; } -std::vector all_tests = merge_tests({ - binary_op_tests_get, - gaussian_blur_tests_get, - min_max_tests_get, - rgb2yuv_tests_get, - sobel_tests_get, - exp_tests_get, - float_conversion_tests_get, -}); +std::vector all_tests = + merge_tests({binary_op_tests_get, gaussian_blur_tests_get, + min_max_tests_get, rgb2yuv_tests_get, sobel_tests_get, + exp_tests_get, float_conversion_tests_get, scale_tests_get}); #if MANAGER void fail_print_matrices(size_t height, size_t width, cv::Mat& input, diff --git a/doc/functionality.md b/doc/functionality.md index d09a7a721aeb2067c11b4b35e1b151af97584c46..da7e88899710a16e5f070d2e142fdff1ac78eac9 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -18,7 +18,7 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Saturating Multiply | x | x | x | x | x | | | | | | | Threshold binary | | x | | | | | | | | | | SaturatingAddAbsWithThreshold| | | x | | | | | | | | -| Scale | | x | | | | | | | | | +| Scale | | x | | | | | | | x | | | CompareEqual | | x | | | | | | | | | | CompareGreater | | x | | | | | | | | | diff --git a/doc/opencv.md b/doc/opencv.md index 57f7d7e32a228d4d9df5a178b1564d5b6cde50c6..a2047eea07f435ce7dbb3a008ba776aae1f84656 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -167,7 +167,9 @@ Notes on parameters: + `CV_32S` ### `convertTo` -This function will scale given input using `scale` and `shift` if they are significant enough, and if `src_depth` and `dst_depth` are equal to `CV_8U`. +This function will scale given input using `scale` and `shift` if they are significant enough, and if `src_depth` equals `dst_depth`. Supported depths: + + `CV_8U` + + `CV_32F` Additionally, it is able to convert between data types as follows: @@ -190,4 +192,4 @@ Notes on parameters: * `operation` - flag specifying correspondence between the arrays. Supported [OpenCV cmp types](https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga0cc47ff833d40b58ecbe1d609a53d784) are: + `cv::CMP_EQ ` - + `cv::CMP_GT` \ No newline at end of file + + `cv::CMP_GT` diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index b27f585826f59b5ce5d1aa7cb6cf0e7ccd0c4dcf..134ca5ed81987fd7551b6cfc8593d9227eb6a35f 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1460,6 +1460,10 @@ KLEIDICV_API_DECLARATION(kleidicv_min_max_loc_u8, const uint8_t *src, KLEIDICV_API_DECLARATION(kleidicv_scale_u8, const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, float scale, float shift); +/// @copydoc kleidicv_scale_u8 +KLEIDICV_API_DECLARATION(kleidicv_scale_f32, const float *src, + size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height, float scale, float shift); /// Exponential function, input is the elements in `src`, output is the elements /// in `dst`. diff --git a/kleidicv/src/arithmetics/scale_api.cpp b/kleidicv/src/arithmetics/scale_api.cpp index d7d22964b8f3e32097479e79f01b1816b4d8b0ad..ef6ed8584dab2c57561d0396223fcba80cef5889 100644 --- a/kleidicv/src/arithmetics/scale_api.cpp +++ b/kleidicv/src/arithmetics/scale_api.cpp @@ -16,18 +16,27 @@ kleidicv_error_t scale(const T *src, size_t src_stride, T *dst, float scale, float shift); } // namespace neon -namespace sve2 {} // namespace sve2 +namespace sve2 { -namespace sme2 {} // namespace sme2 +template +kleidicv_error_t scale(const T *src, size_t src_stride, T *dst, + size_t dst_stride, size_t width, size_t height, + float scale, float shift); -} // namespace kleidicv +} // namespace sve2 + +namespace sme2 { +template +kleidicv_error_t scale(const T *src, size_t src_stride, T *dst, + size_t dst_stride, size_t width, size_t height, + float scale, float shift); + +} // namespace sme2 -#define KLEIDICV_DEFINE_SCALE_API(name, type) \ - KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::scale, nullptr, \ - nullptr) +} // namespace kleidicv -KLEIDICV_DEFINE_SCALE_API(kleidicv_scale_u8, uint8_t); -// KLEIDICV_DEFINE_SCALE_API(kleidicv_scale_s8, int8_t); -// KLEIDICV_DEFINE_SCALE_API(kleidicv_scale_u16, uint16_t); -// KLEIDICV_DEFINE_SCALE_API(kleidicv_scale_s16, int16_t); -// KLEIDICV_DEFINE_SCALE_API(kleidicv_scale_s32, int32_t); +KLEIDICV_MULTIVERSION_C_API(kleidicv_scale_u8, &kleidicv::neon::scale, + nullptr, nullptr); +KLEIDICV_MULTIVERSION_C_API(kleidicv_scale_f32, &kleidicv::neon::scale, + &kleidicv::sve2::scale, + &kleidicv::sme2::scale); diff --git a/kleidicv/src/arithmetics/scale_neon.cpp b/kleidicv/src/arithmetics/scale_neon.cpp index 9c29957314a860cad99ff4a03a148db56fce133b..7c46ec0454964b852f9486aa83e064d7edbab87f 100644 --- a/kleidicv/src/arithmetics/scale_neon.cpp +++ b/kleidicv/src/arithmetics/scale_neon.cpp @@ -8,12 +8,15 @@ #include "kleidicv/kleidicv.h" #include "kleidicv/neon.h" +#include "kleidicv/traits.h" namespace kleidicv::neon { // Scale algorithm: for each value in the source, // dst[i] = src[i] * scale + shift (floating point operation) // +// Unsigned 8-bit implementation +// // Since converting from uint8 to float32 and back takes more steps, // 'ScaleTbx' saves time by pre-calculating all 256 values and uses TBLs // and TBXs to map the values directly from uint8 to uint8: @@ -51,9 +54,9 @@ namespace kleidicv::neon { // size and speed. template -class ScaleBase : public UnrollTwice { +class ScaleIntBase : public UnrollTwice { public: - ScaleBase(float scale, float shift) : scale_{scale}, shift_{shift} {} + ScaleIntBase(float scale, float shift) : scale_{scale}, shift_{shift} {} protected: static constexpr ScalarType ScalarMax = @@ -70,15 +73,16 @@ class ScaleBase : public UnrollTwice { float scale_, shift_; }; -template -class ScaleTbx final : public ScaleBase { +class ScaleUint8Tbx final : public ScaleIntBase { public: + using ScalarType = uint8_t; using VecTraits = neon::VecTraits; using VectorType = typename VecTraits::VectorType; using Vector2Type = typename VecTraits::Vector2Type; using Vector3Type = typename VecTraits::Vector3Type; - ScaleTbx(float scale, float shift) : ScaleBase(scale, shift) { + ScaleUint8Tbx(float scale, float shift) + : ScaleIntBase(scale, shift) { constexpr size_t TableLength = 1 << (CHAR_BIT * sizeof(ScalarType)); ScalarType values[TableLength]; for (size_t i = 0; i < TableLength; ++i) { @@ -112,21 +116,21 @@ class ScaleTbx final : public ScaleBase { ScalarType scalar_path(ScalarType src) { return this->scale_value(src); } private: - Vector3Type t0_3_, t1_3_, t3_3_, t5_3_; - Vector2Type t2_2_, t4_2_; + Vector3Type t0_3_{}, t1_3_{}, t3_3_{}, t5_3_{}; + Vector2Type t2_2_{}, t4_2_{}; VectorType v_step3_, v_step2_; -}; // end of class ScaleTbx +}; // end of class ScaleUint8Tbx -// Opposite to ScaleTbx, ScaleFloat is the direct approach: +// Opposite to ScaleUint8Tbx, ScaleUint8Calc is the direct approach: // - calculate dst[i] = src[i] * scale + shift using vector instructions -template -class ScaleFloat final : public ScaleBase { +class ScaleUint8Calc final : public ScaleIntBase { public: + using ScalarType = uint8_t; using VecTraits = neon::VecTraits; using VectorType = typename VecTraits::VectorType; - ScaleFloat(float scale, float shift) - : ScaleBase(scale, shift), + ScaleUint8Calc(float scale, float shift) + : ScaleIntBase(scale, shift), vscale_{vdupq_n_f32(scale)}, vshift_{vdupq_n_f32(shift)} {} @@ -166,10 +170,68 @@ class ScaleFloat final : public ScaleBase { } float32x4_t vscale_, vshift_; -}; // end of class ScaleFloat +}; // end of class ScaleUint8Calc + +// ----------------------------------------------------------------------- +// Float implementation +// ----------------------------------------------------------------------- + +class AddFloat final : public UnrollTwice, + public UnrollOnce, + public TryToAvoidTailLoop { + public: + using ScalarType = float; + using VecTraits = neon::VecTraits; + using VectorType = typename VecTraits::VectorType; + + explicit AddFloat(float shift) : shift_{shift}, vshift_{vdupq_n_f32(shift)} {} + + VectorType vector_path(VectorType src) { return vaddq_f32(vshift_, src); } + + // NOLINTBEGIN(readability-make-member-function-const) + ScalarType scalar_path(ScalarType src) { return src + shift_; } + // NOLINTEND(readability-make-member-function-const) + + private: + float shift_; + float32x4_t vshift_; +}; // end of class AddFloat + +class ScaleFloat final : public UnrollTwice, + public UnrollOnce, + public TryToAvoidTailLoop { + public: + using ScalarType = float; + using VecTraits = neon::VecTraits; + using VectorType = typename VecTraits::VectorType; + + ScaleFloat(float scale, float shift) + : scale_{scale}, + shift_{shift}, + vscale_{vdupq_n_f32(scale)}, + vshift_{vdupq_n_f32(shift)} {} + + VectorType vector_path(VectorType src) { + return vmlaq_f32(vshift_, src, vscale_); + } + + // NOLINTBEGIN(readability-make-member-function-const) + ScalarType scalar_path(ScalarType src) { return src * scale_ + shift_; } + // NOLINTEND(readability-make-member-function-const) + + private: + float scale_, shift_; + float32x4_t vscale_, vshift_; +}; // end of class ScaleFloat template kleidicv_error_t scale(const T *src, size_t src_stride, T *dst, + size_t dst_stride, size_t width, size_t height, + float scale, float shift); + +// Specialization for uint8_t +template <> +kleidicv_error_t scale(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, float scale, float shift) { CHECK_POINTER_AND_STRIDE(src, src_stride, height); @@ -177,30 +239,41 @@ kleidicv_error_t scale(const T *src, size_t src_stride, T *dst, CHECK_IMAGE_SIZE(width, height); Rectangle rect{width, height}; - Rows src_rows{src, src_stride}; - Rows dst_rows{dst, dst_stride}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; // For smaller inputs, the full calculation is the faster if (width * height < 2500) { // empirical value - ScaleFloat operation(scale, shift); + ScaleUint8Calc operation(scale, shift); apply_operation_by_rows(operation, rect, src_rows, dst_rows); } else { // For bigger inputs, it's faster to pre-calculate the table // and map those values during the run - ScaleTbx operation(scale, shift); + ScaleUint8Tbx operation(scale, shift); apply_operation_by_rows(operation, rect, src_rows, dst_rows); } return KLEIDICV_OK; } -#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ - template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t scale( \ - const type *src, size_t src_stride, type *dst, size_t dst_stride, \ - size_t width, size_t height, float scale, float shift) +// Specialization for float +template <> +kleidicv_error_t scale(const float *src, size_t src_stride, float *dst, + size_t dst_stride, size_t width, size_t height, + float scale, float shift) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); -KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); -// KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); -// KLEIDICV_INSTANTIATE_TEMPLATE(int16_t); -// KLEIDICV_INSTANTIATE_TEMPLATE(uint16_t); -// KLEIDICV_INSTANTIATE_TEMPLATE(int32_t); + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + if (scale == 1.0) { + AddFloat operation(shift); + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } else { + ScaleFloat operation(scale, shift); + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } + return KLEIDICV_OK; +} } // namespace kleidicv::neon diff --git a/kleidicv/src/arithmetics/scale_sc.h b/kleidicv/src/arithmetics/scale_sc.h new file mode 100644 index 0000000000000000000000000000000000000000..269c15b4c6a18f35628a42fa753845f156fa16ef --- /dev/null +++ b/kleidicv/src/arithmetics/scale_sc.h @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_SCALE_SC_H +#define KLEIDICV_SCALE_SC_H + +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +class AddFloat final : public UnrollTwice { + public: + using ContextType = Context; + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + + explicit AddFloat(const svfloat32_t &svshift) KLEIDICV_STREAMING_COMPATIBLE + : svshift_{svshift} {} + + // NOLINTBEGIN(readability-make-member-function-const) + VectorType vector_path(ContextType ctx, + VectorType src) KLEIDICV_STREAMING_COMPATIBLE { + return svadd_x(ctx.predicate(), src, svshift_); + } + // NOLINTEND(readability-make-member-function-const) + + private: + const svfloat32_t &svshift_; +}; // end of class AddFloat + +class ScaleFloat final : public UnrollTwice { + public: + using ContextType = Context; + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + + ScaleFloat(const svfloat32_t &svscale, + const svfloat32_t &svshift) KLEIDICV_STREAMING_COMPATIBLE + : svscale_{svscale}, + svshift_{svshift} {} + + // NOLINTBEGIN(readability-make-member-function-const) + VectorType vector_path(ContextType ctx, + VectorType src) KLEIDICV_STREAMING_COMPATIBLE { + return svmla_x(ctx.predicate(), svshift_, src, svscale_); + } + // NOLINTEND(readability-make-member-function-const) + + private: + const svfloat32_t &svscale_, &svshift_; +}; // end of class ScaleFloat + +template +kleidicv_error_t scale_sc(const T *src, size_t src_stride, T *dst, + size_t dst_stride, size_t width, size_t height, + float scale, + float shift) KLEIDICV_STREAMING_COMPATIBLE; + +// Specialization for float +template <> +kleidicv_error_t scale_sc(const float *src, size_t src_stride, float *dst, + size_t dst_stride, size_t width, size_t height, + float scale, + float shift) KLEIDICV_STREAMING_COMPATIBLE { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + svfloat32_t svscale = svdup_f32(scale); + svfloat32_t svshift = svdup_f32(shift); + if (scale == 1.0) { + AddFloat operation(svshift); + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } else { + ScaleFloat operation(svscale, svshift); + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } + return KLEIDICV_OK; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_SCALE_SC_H diff --git a/kleidicv/src/arithmetics/scale_sme2.cpp b/kleidicv/src/arithmetics/scale_sme2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c1b6822884f94368b9e3bdc049850cf6356a72a --- /dev/null +++ b/kleidicv/src/arithmetics/scale_sme2.cpp @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "scale_sc.h" + +namespace kleidicv::sme2 { + +template +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +scale(const T* src, size_t src_stride, T* dst, size_t dst_stride, size_t width, + size_t height, float scale, float shift) { + return scale_sc(src, src_stride, dst, dst_stride, width, height, scale, + shift); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t scale( \ + const type* src, size_t src_stride, type* dst, size_t dst_stride, \ + size_t width, size_t height, float scale, float shift) + +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::sme2 diff --git a/kleidicv/src/arithmetics/scale_sve2.cpp b/kleidicv/src/arithmetics/scale_sve2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ba7b1c32e2162fdc1078c1ebd76673419b666b5d --- /dev/null +++ b/kleidicv/src/arithmetics/scale_sve2.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "scale_sc.h" + +namespace kleidicv::sve2 { + +template +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t scale(const T* src, size_t src_stride, + T* dst, size_t dst_stride, + size_t width, size_t height, + float scale, float shift) { + return scale_sc(src, src_stride, dst, dst_stride, width, height, scale, + shift); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t scale( \ + const type* src, size_t src_stride, type* dst, size_t dst_stride, \ + size_t width, size_t height, float scale, float shift) + +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::sve2 diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index d51b6a72ff82a8ab13d36f6dfe766879cdae9f0e..dc20bddcaa125a413237be32f74fa4f75fc5a359 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -52,5 +52,7 @@ RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Resize4x4_8b op RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Resize4x4_float opencv_perf_imgproc '*resizeUpLinearNonExact*' '(32FC1, (960x540, 3840x2160))')") RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Scale opencv_perf_core '*convertTo*' '(3840x2160, 8UC1, 8UC1, 1, 1.234, 4.567)')") +RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Scale_float_1.0 opencv_perf_core '*convertTo*' '(1920x1080, 32FC1, 32FC1, 1, 1, 4.567)')") +RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Scale_float opencv_perf_core '*convertTo*' '(1920x1080, 32FC1, 32FC1, 1, 1.234, 4.567)')") echo "$RES" diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index 2f21a29bd18e4ea46f4b8f59d2676ee4edddecaa..53f426b960c8b7865f633f52d5e6979e69aa6dc5 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -52,5 +52,7 @@ RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Resize4x4_8b op RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Resize4x4_float opencv_perf_imgproc '*resizeUpLinearNonExact*' '(32FC1, (480x270, 1920x1080))')") RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Scale opencv_perf_core '*convertTo*' '(1920x1080, 8UC1, 8UC1, 1, 1.234, 4.567)')") +RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Scale_float_1.0 opencv_perf_core '*convertTo*' '(1920x1080, 32FC1, 32FC1, 1, 1, 4.567)')") +RES=$(printf "${RES}\n$(${DEV_DIR}/perf_test_op.sh $CPU $THERMAL Scale_float opencv_perf_core '*convertTo*' '(1920x1080, 32FC1, 32FC1, 1, 1.234, 4.567)')") echo "$RES" diff --git a/test/api/test_scale.cpp b/test/api/test_scale.cpp index 3c7232e3a4733fd18877d2acd1812b688ee5ac5e..39663a2571881ab681e4fe5255fc8ecfddfd53e3 100644 --- a/test/api/test_scale.cpp +++ b/test/api/test_scale.cpp @@ -4,53 +4,90 @@ #include +#include +#include + #include "framework/array.h" #include "framework/generator.h" #include "framework/operation.h" #include "kleidicv/kleidicv.h" #include "test_config.h" -#define KLEIDICV_SCALE(type, suffix) \ - KLEIDICV_API(scale, kleidicv_scale_##suffix, type) +template +static DestinationType saturating_cast(SourceType value) { + if (value > + static_cast(std::numeric_limits::max())) { + return std::numeric_limits::max(); + } + if (value < std::numeric_limits::lowest()) { + return std::numeric_limits::lowest(); + } + return static_cast(value); +} -KLEIDICV_SCALE(uint8_t, u8); +uint8_t scalar_scale_u8(uint8_t x, float scale, float shift) { + float result = static_cast(x) * scale + shift; + if (result < std::numeric_limits::min()) { + return std::numeric_limits::min(); + } + if (result > std::numeric_limits::max()) { + return std::numeric_limits::max(); + } + return static_cast(lrintf(result)); +} + +float scalar_scale_f32(float x, float scale, float shift) { + return x * scale + shift; +} + +#define KLEIDICV_SCALE_API(type, suffix) \ + KLEIDICV_API(scale_api, kleidicv_scale_##suffix, type) + +#define KLEIDICV_SCALE_OPERATION(type, suffix) \ + KLEIDICV_API(scale_operation, &scalar_scale_##suffix, type) + +KLEIDICV_SCALE_API(uint8_t, u8); +KLEIDICV_SCALE_OPERATION(uint8_t, u8); +KLEIDICV_SCALE_API(float, f32); +KLEIDICV_SCALE_OPERATION(float, f32); template class ScaleTestBase : public UnaryOperationTest { protected: using UnaryOperationTest::min; using UnaryOperationTest::max; + using typename UnaryOperationTest::Elements; // Calls the API-under-test in the appropriate way. kleidicv_error_t call_api() override { - return kleidicv_scale_u8(this->inputs_[0].data(), this->inputs_[0].stride(), - this->actual_[0].data(), this->actual_[0].stride(), - this->width(), this->height(), this->scale(), - this->shift()); + return scale_api()( + this->inputs_[0].data(), this->inputs_[0].stride(), + this->actual_[0].data(), this->actual_[0].stride(), this->width(), + this->height(), this->scale(), this->shift()); } virtual float scale() = 0; virtual float shift() = 0; // Prepares expected outputs for the operation. void setup() override { - ElementType expected = 0; - if (shift() < min()) { - expected = min(); - } else if (shift() > max()) { - expected = max(); - } else { - expected = lrintf(shift()); - } - this->expected_[0].fill(expected); + this->expected_[0].fill( + scale_operation()(0, scale(), shift())); UnaryOperationTest::setup(); } + + void fill_expected(std::vector& elements) { + for (size_t i = 0; i < elements.size(); ++i) { + elements[i].values[1] = scale_operation()( + elements[i].values[0], scale(), shift()); + } + } }; // end of class ScaleTestBase template class ScaleTestLinearBase { public: // Sets the number of padding bytes at the end of rows. - ScaleTestLinearBase& with_padding(size_t padding) { + ScaleTestLinearBase& with_padding(size_t padding) { padding_ = padding; return *this; } @@ -73,42 +110,51 @@ class ScaleTestLinearBase { static constexpr ElementType max() { return std::numeric_limits::max(); } + static constexpr ElementType lowest() { + return std::numeric_limits::lowest(); + } virtual float scale() = 0; virtual float shift() = 0; private: class GenerateLinearSeries : public test::Generator { public: - explicit GenerateLinearSeries(ElementType start_from) - : counter_{start_from} {} + explicit GenerateLinearSeries(ElementType start_from, ElementType step) + : counter_{start_from}, step_{step} {} - std::optional next() override { return counter_++; } + std::optional next() override { return counter_ + step_; } private: - ElementType counter_; + ElementType counter_, step_; }; // end of class GenerateLinearSeries // Number of padding bytes at the end of rows. size_t padding_{0}; void test_linear(size_t width, size_t minimum_size) { - size_t image_size = - std::max(minimum_size, static_cast(max() - min())); + size_t image_size = std::max( + minimum_size, + std::min(saturating_cast(max() - lowest()), + 10000UL)); + size_t step = + std::max(static_cast(image_size / (max() - lowest())), + static_cast(1)); size_t height = image_size / width + 1; test::Array2D source(width, height, padding_, 1); test::Array2D expected(width, height, padding_, 1); test::Array2D actual = test::Array2D(width, height, padding_, 1); - GenerateLinearSeries generator(min()); + GenerateLinearSeries generator(min(), step); source.fill(generator); calculate_expected(source, expected); - ASSERT_EQ(KLEIDICV_OK, kleidicv_scale_u8(source.data(), source.stride(), - actual.data(), actual.stride(), - width, height, scale(), shift())); + ASSERT_EQ(KLEIDICV_OK, + scale_api()(source.data(), source.stride(), + actual.data(), actual.stride(), width, + height, scale(), shift())); EXPECT_EQ_ARRAY2D(expected, actual); } @@ -118,19 +164,9 @@ class ScaleTestLinearBase { test::Array2D& expected) { for (size_t hindex = 0; hindex < source.height(); ++hindex) { for (size_t vindex = 0; vindex < source.width(); ++vindex) { - ElementType calculated = 0; - // NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) - float result = *source.at(hindex, vindex) * scale() + shift(); - // NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) - if (result > max()) { - calculated = max(); - } else if (result < min()) { - calculated = min(); - } else { - calculated = lrintf(result); - } - *expected.at(hindex, vindex) = calculated; + *expected.at(hindex, vindex) = scale_operation()( + *source.at(hindex, vindex), scale(), shift()); // NOLINTEND(clang-analyzer-core.uninitialized.Assign) } } @@ -159,16 +195,17 @@ template class ScaleTestAdd final : public ScaleTestBase { using Elements = typename UnaryOperationTest::Elements; - float scale() override { return 6; } - float shift() override { return 2; } + float scale() override { return 1; } + float shift() override { return 6; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off - { 8, 50}, - {12, 74}, + { 8, 14}, + {12, 18}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -181,12 +218,13 @@ class ScaleTestSubtract final : public ScaleTestBase { float shift() override { return -3; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off { 6, 45}, { 20, 157}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -199,12 +237,13 @@ class ScaleTestDivide final : public ScaleTestBase { float shift() override { return 3; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off - { 252, 66}, - { 255, 67}, + { 252, 0}, + { 255, 0}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -217,12 +256,13 @@ class ScaleTestMultiply final : public ScaleTestBase { float shift() override { return 2.72; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off - { 60, 191}, - { 75, 238}, + { 60, 0}, + { 45, 0}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -232,18 +272,21 @@ class ScaleTestZero final : public ScaleTestBase { using Elements = typename UnaryOperationTest::Elements; using UnaryOperationTest::min; using UnaryOperationTest::max; + using UnaryOperationTest::lowest; float scale() override { return 0; } float shift() override { return 0; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off + { lowest(), 0}, { min(), 0}, { 0, 0}, { max(), 0}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -252,18 +295,24 @@ template class ScaleTestUnderflowByShift final : public ScaleTestBase { using Elements = typename UnaryOperationTest::Elements; using UnaryOperationTest::min; - using UnaryOperationTest::max; + using UnaryOperationTest::lowest; float scale() override { return 1; } float shift() override { return -1; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off - {min() + 1, min()}, - { min(), min()}, + {lowest() + 1, 0}, + { lowest(), 0}, + { min() + 1, 0}, + { min(), 0}, + { 0, 0}, + { 1, 0}, + { 2, 0}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -277,12 +326,13 @@ class ScaleTestOverflowByShift final : public ScaleTestBase { float shift() override { return 1; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off - {max() - 1, max()}, - { max(), max()}, + {max() - 1, 0}, + { max(), 0}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -297,11 +347,12 @@ class ScaleTestUnderflowByScale final : public ScaleTestBase { float shift() override { return 0; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off - { max(), min()} + { max(), 0}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -315,11 +366,12 @@ class ScaleTestOverflowByScale final : public ScaleTestBase { float shift() override { return 0; } const std::vector& test_elements() override { - static const std::vector kTestElements = { + static std::vector kTestElements = { // clang-format off - { max(), max()}, + { max(), 0}, // clang-format on }; + ScaleTestBase::fill_expected(kTestElements); return kTestElements; } }; @@ -327,9 +379,8 @@ class ScaleTestOverflowByScale final : public ScaleTestBase { template class ScaleTest : public testing::Test {}; -using ElementTypes = ::testing::Types; +using ElementTypes = ::testing::Types; -// Tests kleidicv_scale_u8 API. TYPED_TEST_SUITE(ScaleTest, ElementTypes); TYPED_TEST(ScaleTest, TestScalar1) { @@ -378,13 +429,37 @@ TYPED_TEST(ScaleTest, TestVector3Tbx) { ScaleTestLinear3{}.test_vector(2500); } -TYPED_TEST(ScaleTest, TestAdd) { ScaleTestAdd{}.test(); } +TYPED_TEST(ScaleTest, TestAdd) { + ScaleTestAdd{}.test(); + ScaleTestAdd{} + .with_padding(1) + .with_width(test::Options::vector_lanes() - 1) + .test(); +} -TYPED_TEST(ScaleTest, TestSubtract) { ScaleTestSubtract{}.test(); } +TYPED_TEST(ScaleTest, TestSubtract) { + ScaleTestSubtract{}.test(); + ScaleTestSubtract{} + .with_padding(1) + .with_width(test::Options::vector_lanes() - 1) + .test(); +} -TYPED_TEST(ScaleTest, TestDivide) { ScaleTestDivide{}.test(); } +TYPED_TEST(ScaleTest, TestDivide) { + ScaleTestDivide{}.test(); + ScaleTestDivide{} + .with_padding(1) + .with_width(test::Options::vector_lanes() - 1) + .test(); +} -TYPED_TEST(ScaleTest, TestMultiply) { ScaleTestMultiply{}.test(); } +TYPED_TEST(ScaleTest, TestMultiply) { + ScaleTestMultiply{}.test(); + ScaleTestMultiply{} + .with_padding(1) + .with_width(test::Options::vector_lanes() - 1) + .test(); +} TYPED_TEST(ScaleTest, TestZero) { ScaleTestZero{}.test(); } @@ -406,7 +481,7 @@ TYPED_TEST(ScaleTest, TestOverflowByScale) { TYPED_TEST(ScaleTest, NullPointer) { TypeParam src[1] = {}, dst[1]; - test::test_null_args(scale(), src, sizeof(TypeParam), dst, + test::test_null_args(scale_api(), src, sizeof(TypeParam), dst, sizeof(TypeParam), 1, 1, 2, 0); } @@ -417,28 +492,29 @@ TYPED_TEST(ScaleTest, Misalignment) { } TypeParam src[2] = {}, dst[2]; EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, - scale()(src, sizeof(TypeParam) + 1, dst, - sizeof(TypeParam), 1, 2, 2, 0)); + scale_api()(src, sizeof(TypeParam) + 1, dst, + sizeof(TypeParam), 1, 2, 2, 0)); EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, - scale()(src, sizeof(TypeParam), dst, - sizeof(TypeParam) + 1, 1, 2, 2, 0)); + scale_api()(src, sizeof(TypeParam), dst, + sizeof(TypeParam) + 1, 1, 2, 2, 0)); } TYPED_TEST(ScaleTest, ZeroImageSize) { TypeParam src[1] = {}, dst[1]; - EXPECT_EQ(KLEIDICV_OK, scale()(src, sizeof(TypeParam), dst, - sizeof(TypeParam), 0, 1, 2, 0)); - EXPECT_EQ(KLEIDICV_OK, scale()(src, sizeof(TypeParam), dst, - sizeof(TypeParam), 1, 0, 2, 0)); + EXPECT_EQ(KLEIDICV_OK, scale_api()(src, sizeof(TypeParam), dst, + sizeof(TypeParam), 0, 1, 2, 0)); + EXPECT_EQ(KLEIDICV_OK, scale_api()(src, sizeof(TypeParam), dst, + sizeof(TypeParam), 1, 0, 2, 0)); } TYPED_TEST(ScaleTest, OversizeImage) { TypeParam src[1] = {}, dst[1]; + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + scale_api()(src, sizeof(TypeParam), dst, sizeof(TypeParam), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 2, 0)); EXPECT_EQ(KLEIDICV_ERROR_RANGE, - scale()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 2, 0)); - EXPECT_EQ(KLEIDICV_ERROR_RANGE, - scale()(src, sizeof(TypeParam), dst, sizeof(TypeParam), - KLEIDICV_MAX_IMAGE_PIXELS, - KLEIDICV_MAX_IMAGE_PIXELS, 2, 0)); + scale_api()(src, sizeof(TypeParam), dst, + sizeof(TypeParam), KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, 2, 0)); } diff --git a/test/framework/operation.h b/test/framework/operation.h index f8e78f90f1cb4765f4ab11deb37257cf21b63e57..2cc59e8f8bd0d4e790c7773b670992c3d734ff4b 100644 --- a/test/framework/operation.h +++ b/test/framework/operation.h @@ -142,6 +142,11 @@ class OperationTest { return std::numeric_limits::min(); } + // Returns the lowest value for ElementType. + static constexpr ElementType lowest() { + return std::numeric_limits::lowest(); + } + // Returns the maximum value for ElementType. static constexpr ElementType max() { return std::numeric_limits::max();