From 481310508d3ba57bb8a2e51b305ea070a9e58f35 Mon Sep 17 00:00:00 2001 From: Denes Tarjan Date: Thu, 29 May 2025 13:12:38 +0000 Subject: [PATCH] Fix ScaleFloat in case of in-place operation NEON variant only. Fixes #9. Root cause: Due to TryToAvoidTailLoop, when width is bigger than 4 and is not divisible with 4, it scaled some elements twice. --- conformity/opencv/test_scale.cpp | 48 ++++++++++-------- kleidicv/src/arithmetics/scale_neon.cpp | 8 +-- test/api/test_scale.cpp | 65 +++++++++++++++++-------- 3 files changed, 77 insertions(+), 44 deletions(-) diff --git a/conformity/opencv/test_scale.cpp b/conformity/opencv/test_scale.cpp index bc1614a17..a6df11b6a 100644 --- a/conformity/opencv/test_scale.cpp +++ b/conformity/opencv/test_scale.cpp @@ -1,23 +1,23 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 -#include -#include -#include - #include "opencv2/core/hal/interface.h" -#include "tests.h" +#include "utils.h" -template +template cv::Mat exec_scale(cv::Mat& input_mat) { + if constexpr (InPlace) { + input_mat.convertTo(input_mat, -1, Scale / 1000.0, Shift / 1000.0); + return input_mat; + } cv::Mat result; input_mat.convertTo(result, -1, Scale / 1000.0, Shift / 1000.0); return result; } #if MANAGER -template +template bool test_scale(int index, RecreatedMessageQueue& request_queue, RecreatedMessageQueue& reply_queue) { cv::RNG rng(0); @@ -26,9 +26,9 @@ bool test_scale(int index, RecreatedMessageQueue& request_queue, for (size_t y = 5; y <= 16; ++y) { cv::Mat input_mat(x, y, Format); rng.fill(input_mat, cv::RNG::NORMAL, 0.0, 1.0e10); - cv::Mat actual_mat = exec_scale(input_mat); cv::Mat expected_mat = get_expected_from_subordinate( index, request_queue, reply_queue, input_mat); + cv::Mat actual_mat = exec_scale(input_mat); bool success = (CV_MAT_DEPTH(Format) == CV_32F && @@ -50,17 +50,27 @@ bool test_scale(int index, RecreatedMessageQueue& request_queue, std::vector& scale_tests_get() { // clang-format off static std::vector tests = { - TEST("Scale float32, scale=1.0, shift=2.0", (test_scale<1000, 2000, CV_32FC4>), (exec_scale<1000, 2000>)), - TEST("Scale float32, scale=-10.0, shift=0.0", (test_scale<(-10000), 0, CV_32FC1>), (exec_scale<(-10000), 0>)), - TEST("Scale float32, scale=3.14, shift=2.72", (test_scale<3140, 2720, CV_32FC2>), (exec_scale<3140, 2720>)), - TEST("Scale float32, scale=7e5, shift=8e-3", (test_scale<700000000, 8, CV_32FC3>), (exec_scale<700000000, 8>)), - TEST("Scale float32, scale=1e-3, shift=8e-3", (test_scale<1, 8, CV_32FC4>), (exec_scale<1, 8>)), + TEST("Scale float32, scale=1.0, shift=2.0", (test_scale<1000, 2000, CV_32FC4, false>), (exec_scale<1000, 2000, false>)), + TEST("Scale float32, scale=-10.0, shift=0.0", (test_scale<(-10000), 0, CV_32FC1, false>), (exec_scale<(-10000), 0, false>)), + TEST("Scale float32, scale=3.14, shift=2.72", (test_scale<3140, 2720, CV_32FC2, false>), (exec_scale<3140, 2720, false>)), + TEST("Scale float32, scale=7e5, shift=8e-3", (test_scale<700000000, 8, CV_32FC3, false>), (exec_scale<700000000, 8, false>)), + TEST("Scale float32, scale=1e-3, shift=8e-3", (test_scale<1, 8, CV_32FC4, false>), (exec_scale<1, 8, false>)), + TEST("Scale float32, scale=1.0, shift=2.0, in-place", (test_scale<1000, 2000, CV_32FC4, true>), (exec_scale<1000, 2000, true>)), + TEST("Scale float32, scale=-10.0, shift=0.0, in-place", (test_scale<(-10000), 0, CV_32FC1, true>), (exec_scale<(-10000), 0, true>)), + TEST("Scale float32, scale=3.14, shift=2.72, in-place", (test_scale<3140, 2720, CV_32FC2, true>), (exec_scale<3140, 2720, true>)), + TEST("Scale float32, scale=7e5, shift=8e-3, in-place", (test_scale<700000000, 8, CV_32FC3, true>), (exec_scale<700000000, 8, true>)), + TEST("Scale float32, scale=1e-3, shift=8e-3, in-place", (test_scale<1, 8, CV_32FC4, true>), (exec_scale<1, 8, true>)), - TEST("Scale uint8, scale=1.0, shift=2.0", (test_scale<1000, 2000, CV_8UC4>), (exec_scale<1000, 2000>)), - TEST("Scale uint8, scale=-10.0, shift=0.0", (test_scale<(-10000), 0, CV_8UC1>), (exec_scale<(-10000), 0>)), - TEST("Scale uint8, scale=3.14, shift=-2.72", (test_scale<3140, -2720, CV_8UC2>), (exec_scale<3140, -2720>)), - TEST("Scale uint8, scale=17.17, shift=3.9", (test_scale<17170, 3900, CV_8UC3>), (exec_scale<17170, 3900>)), - TEST("Scale uint8, scale=0.13, shift=230", (test_scale<130, 230000, CV_8UC4>), (exec_scale<130, 230000>)), + TEST("Scale uint8, scale=1.0, shift=2.0", (test_scale<1000, 2000, CV_8UC4, false>), (exec_scale<1000, 2000, false>)), + TEST("Scale uint8, scale=-10.0, shift=0.0", (test_scale<(-10000), 0, CV_8UC1, false>), (exec_scale<(-10000), 0, false>)), + TEST("Scale uint8, scale=3.14, shift=-2.72", (test_scale<3140, -2720, CV_8UC2, false>), (exec_scale<3140, -2720, false>)), + TEST("Scale uint8, scale=17.17, shift=3.9", (test_scale<17170, 3900, CV_8UC3, false>), (exec_scale<17170, 3900, false>)), + TEST("Scale uint8, scale=0.13, shift=230", (test_scale<130, 230000, CV_8UC4, false>), (exec_scale<130, 230000, false>)), + TEST("Scale uint8, scale=1.0, shift=2.0, in-place", (test_scale<1000, 2000, CV_8UC4, true>), (exec_scale<1000, 2000, true>)), + TEST("Scale uint8, scale=-10.0, shift=0.0, in-place", (test_scale<(-10000), 0, CV_8UC1, true>), (exec_scale<(-10000), 0, true>)), + TEST("Scale uint8, scale=3.14, shift=-2.72, in-place", (test_scale<3140, -2720, CV_8UC2, true>), (exec_scale<3140, -2720, true>)), + TEST("Scale uint8, scale=17.17, shift=3.9, in-place", (test_scale<17170, 3900, CV_8UC3, true>), (exec_scale<17170, 3900, true>)), + TEST("Scale uint8, scale=0.13, shift=230, in-place", (test_scale<130, 230000, CV_8UC4, true>), (exec_scale<130, 230000, true>)), }; // clang-format on return tests; diff --git a/kleidicv/src/arithmetics/scale_neon.cpp b/kleidicv/src/arithmetics/scale_neon.cpp index 3680259ce..a2808a117 100644 --- a/kleidicv/src/arithmetics/scale_neon.cpp +++ b/kleidicv/src/arithmetics/scale_neon.cpp @@ -263,9 +263,7 @@ std::array precalculate_scale_table_u8(float scale, float shift) { // Float implementation // ----------------------------------------------------------------------- -class AddFloat final : public UnrollTwice, - public UnrollOnce, - public TryToAvoidTailLoop { +class AddFloat final : public UnrollTwice, public UnrollOnce { public: using ScalarType = float; using VecTraits = neon::VecTraits; @@ -284,9 +282,7 @@ class AddFloat final : public UnrollTwice, float32x4_t vshift_; }; // end of class AddFloat -class ScaleFloat final : public UnrollTwice, - public UnrollOnce, - public TryToAvoidTailLoop { +class ScaleFloat final : public UnrollTwice, public UnrollOnce { public: using ScalarType = float; using VecTraits = neon::VecTraits; diff --git a/test/api/test_scale.cpp b/test/api/test_scale.cpp index 0d404cfe5..698b9f4b9 100644 --- a/test/api/test_scale.cpp +++ b/test/api/test_scale.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -93,14 +93,14 @@ class ScaleTestLinearBase { } // minimum_size set by caller to trigger the 'big' scale path. - void test_scalar(size_t minimum_size = 1) { + void test_scalar(size_t minimum_size = 1, bool in_place = false) { size_t width = test::Options::vector_length() - 1; - test_linear(width, minimum_size); + test_linear(width, minimum_size, in_place); } - void test_vector(size_t minimum_size = 1) { + void test_vector(size_t minimum_size = 1, bool in_place = false) { size_t width = test::Options::vector_length() * 2; - test_linear(width, minimum_size); + test_linear(width, minimum_size, in_place); } protected: @@ -132,7 +132,7 @@ class ScaleTestLinearBase { // Number of padding bytes at the end of rows. size_t padding_{0}; - void test_linear(size_t width, size_t minimum_size) { + void test_linear(size_t width, size_t minimum_size, bool in_place) { size_t image_size = std::max( minimum_size, std::min(saturating_cast(max() - lowest()), @@ -153,11 +153,19 @@ class ScaleTestLinearBase { calculate_expected(source, expected); - ASSERT_EQ(KLEIDICV_OK, - scale_api()(source.data(), source.stride(), - actual.data(), actual.stride(), width, - height, scale(), shift())); - + if (in_place) { + actual = source; + ASSERT_EQ(KLEIDICV_OK, + scale_api()(actual.data(), actual.stride(), + actual.data(), actual.stride(), width, + height, scale(), shift())); + + } else { + ASSERT_EQ(KLEIDICV_OK, + scale_api()(source.data(), source.stride(), + actual.data(), actual.stride(), width, + height, scale(), shift())); + } EXPECT_EQ_ARRAY2D(expected, actual); } @@ -194,6 +202,12 @@ class ScaleTestLinear3 final : public ScaleTestLinearBase { float shift() override { return 1.41; }; }; +template +class ScaleTestLinear4 final : public ScaleTestLinearBase { + float scale() override { return 1.0; }; + float shift() override { return -17.7; }; +}; + template class ScaleTestAdd final : public ScaleTestBase { using Elements = typename UnaryOperationTest::Elements; @@ -397,12 +411,12 @@ TYPED_TEST(ScaleTest, TestVector1) { } TYPED_TEST(ScaleTest, TestScalar1Tbx) { - ScaleTestLinear1{}.test_scalar(2500); - ScaleTestLinear1{}.with_padding(1).test_scalar(2500); + ScaleTestLinear1{}.test_scalar(700); + ScaleTestLinear1{}.with_padding(1).test_scalar(700); } TYPED_TEST(ScaleTest, TestVector1Tbx) { - ScaleTestLinear1{}.test_vector(2500); - ScaleTestLinear1{}.with_padding(1).test_vector(2500); + ScaleTestLinear1{}.test_vector(700); + ScaleTestLinear1{}.with_padding(1).test_vector(700); } TYPED_TEST(ScaleTest, TestScalar2) { @@ -413,10 +427,10 @@ TYPED_TEST(ScaleTest, TestVector2) { } TYPED_TEST(ScaleTest, TestScalar2Tbx) { - ScaleTestLinear2{}.test_scalar(2500); + ScaleTestLinear2{}.test_scalar(700); } TYPED_TEST(ScaleTest, TestVector2Tbx) { - ScaleTestLinear2{}.test_vector(2500); + ScaleTestLinear2{}.test_vector(700); } TYPED_TEST(ScaleTest, TestScalar3) { @@ -427,10 +441,23 @@ TYPED_TEST(ScaleTest, TestVector3) { } TYPED_TEST(ScaleTest, TestScalar3Tbx) { - ScaleTestLinear3{}.test_scalar(2500); + ScaleTestLinear3{}.test_scalar(700); } TYPED_TEST(ScaleTest, TestVector3Tbx) { - ScaleTestLinear3{}.test_vector(2500); + ScaleTestLinear3{}.test_vector(700); +} + +TYPED_TEST(ScaleTest, InPlaceScalar2) { + ScaleTestLinear2{}.test_scalar(1, true); +} +TYPED_TEST(ScaleTest, InPlaceVector2) { + ScaleTestLinear2{}.test_vector(1, true); +} +TYPED_TEST(ScaleTest, InPlaceAddScalar) { + ScaleTestLinear4{}.test_scalar(1, true); +} +TYPED_TEST(ScaleTest, InPlaceAddVector) { + ScaleTestLinear4{}.test_vector(1, true); } TYPED_TEST(ScaleTest, TestAdd) { -- GitLab