From 67f3d6dab3a0d19a7a94c8d7b989aeef810d88be Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Jul 2024 13:13:41 +0300 Subject: [PATCH 1/8] Add 4K image size to compare perf tests --- adapters/opencv/extra_benchmarks/opencv-4.9.patch | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/adapters/opencv/extra_benchmarks/opencv-4.9.patch b/adapters/opencv/extra_benchmarks/opencv-4.9.patch index e73dd94f3..3f6195ee3 100644 --- a/adapters/opencv/extra_benchmarks/opencv-4.9.patch +++ b/adapters/opencv/extra_benchmarks/opencv-4.9.patch @@ -2,6 +2,19 @@ // // SPDX-License-Identifier: Apache-2.0 +diff --git a/modules/core/perf/perf_compare.cpp b/modules/core/perf/perf_compare.cpp +index be706e1a83..862b8b7c35 100644 +--- a/modules/core/perf/perf_compare.cpp ++++ b/modules/core/perf/perf_compare.cpp +@@ -11,7 +11,7 @@ typedef perf::TestBaseWithParam Size_MatType_CmpType; + + PERF_TEST_P( Size_MatType_CmpType, compare, + testing::Combine( +- testing::Values(::perf::szVGA, ::perf::sz1080p), ++ testing::Values(::perf::szVGA, ::perf::sz1080p, ::perf::sz2160p), + testing::Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1), + CmpType::all() + ) diff --git a/modules/core/perf/perf_convertTo.cpp b/modules/core/perf/perf_convertTo.cpp index 344d81cb8a..ef5a3aa7d2 100644 --- a/modules/core/perf/perf_convertTo.cpp -- GitLab From 0729c5eaa32672f95c3109df4d29a6a7f8363242 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Jul 2024 13:37:36 +0300 Subject: [PATCH 2/8] Improve SC compare API --- kleidicv/src/arithmetics/compare_sc.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/kleidicv/src/arithmetics/compare_sc.h b/kleidicv/src/arithmetics/compare_sc.h index e7d8f9d48..10491c457 100644 --- a/kleidicv/src/arithmetics/compare_sc.h +++ b/kleidicv/src/arithmetics/compare_sc.h @@ -16,12 +16,19 @@ class ComparatorEqual : public UnrollTwice { using ContextType = Context; using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; using VectorType = typename VecTraits::VectorType; + using SignedScalarType = typename std::make_signed::type; + using SignedVecTraits = + KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SignedVectorType = typename SignedVecTraits::VectorType; // NOLINTBEGIN(readability-make-member-function-const) VectorType vector_path(ContextType ctx, VectorType src_a, VectorType src_b) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t predicate = svcmpeq(ctx.predicate(), src_a, src_b); - return svsel(predicate, VecTraits::svdup(255), VecTraits::svdup(0)); + svbool_t pg = ctx.predicate(); + VectorType result1 = sveor_x(pg, src_a, src_b); + VectorType result2 = svcnot_x(pg, result1); + svint8_t result3 = svqneg_x(pg, VecTraits::svreinterpret(result2)); + return SignedVecTraits::svreinterpret(result3); } // NOLINTEND(readability-make-member-function-const) }; // end of class ComparatorEqual @@ -32,12 +39,19 @@ class ComparatorGreater : public UnrollTwice { using ContextType = Context; using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; using VectorType = typename VecTraits::VectorType; + using SignedScalarType = typename std::make_signed::type; + using SignedVecTraits = + KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SignedVectorType = typename SignedVecTraits::VectorType; // NOLINTBEGIN(readability-make-member-function-const) VectorType vector_path(ContextType ctx, VectorType src_a, VectorType src_b) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t predicate = svcmpgt(ctx.predicate(), src_a, src_b); - return svsel(predicate, VecTraits::svdup(255), VecTraits::svdup(0)); + svbool_t pg = ctx.predicate(); + VectorType diff = VecTraits::svhsub(pg, src_b, src_a); + svint8_t shift_right = + SignedVecTraits::svasr_n(pg, VecTraits::svreinterpret(diff), 7); + return SignedVecTraits::svreinterpret(shift_right); } // NOLINTEND(readability-make-member-function-const) }; // end of class ComparatorGreater -- GitLab From 7d51f6cf02492027be47d60460774bf9b64a5a98 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Jul 2024 13:38:16 +0300 Subject: [PATCH 3/8] Add compare phone benchmarks --- scripts/benchmark/run_benchmarks_4K.sh | 3 +++ scripts/benchmark/run_benchmarks_FHD.sh | 2 ++ 2 files changed, 5 insertions(+) diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 837f8a3af..50cfd47e8 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -83,4 +83,7 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL IntToFloat opencv_perf_core '*convertTo/*' '(3840x2160, 8SC1, 32FC1, 1, 1, 0)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL UintToFloat opencv_perf_core '*convertTo/*' '(3840x2160, 8UC1, 32FC1, 1, 1, 0)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareEq opencv_perf_core '*compare/*' '(3840x2160, 8UC1, CMP_EQ)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareGt opencv_perf_core '*compare/*' '(3840x2160, 8UC1, CMP_GT)')") + echo "$RES" diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index 838061b88..234e2767d 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -83,5 +83,7 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL IntToFloat opencv_perf_core '*convertTo/*' '(1920x1080, 8SC1, 32FC1, 1, 1, 0)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL UintToFloat opencv_perf_core '*convertTo/*' '(1920x1080, 8UC1, 32FC1, 1, 1, 0)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareEq opencv_perf_core '*compare/*' '(1920x1080, 8UC1, CMP_EQ)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareGt opencv_perf_core '*compare/*' '(1920x1080, 8UC1, CMP_GT)')") echo "$RES" -- GitLab From 17d9ea3b619fa11bb0126bfcf6817110cbd5e420 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Jul 2024 13:38:47 +0300 Subject: [PATCH 4/8] Improve compare tests coverage --- test/api/test_compare.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/api/test_compare.cpp b/test/api/test_compare.cpp index 5c0642d53..b5b1230fe 100644 --- a/test/api/test_compare.cpp +++ b/test/api/test_compare.cpp @@ -82,10 +82,11 @@ class CompareTestLinear final { test::Array2D actual = test::Array2D(width, height, padding_, 1); - GenerateLinearSeries generator(min()); + GenerateLinearSeries generator_a(min()); + GenerateLinearSeries generator_b(128); - source_a.fill(generator); - source_b.fill(255); + source_a.fill(generator_a); + source_b.fill(generator_b); expected.fill(0); calculate_expected(source_a, source_b, expected); -- GitLab From 4873c1d449bd15702afe792a5646e7f9aee07add Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Jul 2024 15:30:58 +0300 Subject: [PATCH 5/8] Extend CI script with OCV compare functional tests --- scripts/ci-opencv.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index 23a638110..5f30336b0 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -66,6 +66,7 @@ CORE_TEST_PATTERNS=( '*Core_MinMaxIdx*' '*Core_minMaxIdx*' '*Core_Array*' + '*Compare*' ) CORE_TEST_PATTERNS_STR="$(join_strings_with_colon "${CORE_TEST_PATTERNS[*]}")" ../../../conformity/opencv_kleidicv/bin/opencv_test_core \ -- GitLab From b70f1a5142089e4007e842ee11b82170fa0df219 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Jul 2024 15:43:26 +0300 Subject: [PATCH 6/8] Add changes to compare SC API --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a694fd60..7e15028aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ This changelog aims to follow the guiding principles of - Gaussian Blur API specification. - In the OpenCV HAL, cvtColor YUV2RGB_NV21 is multithreaded. - In the OpenCV HAL, minMaxIdx is multithreaded when index is not requested. +- Improved performance of Compare Equal and Greater SC API. ### Removed -- GitLab From 558484bda01d49c805a433a15cffa73ca36e5d22 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Jul 2024 16:51:15 +0300 Subject: [PATCH 7/8] Remove LOCALLY_STREAMING keywords from SVE cmp API --- kleidicv/src/arithmetics/compare_sve2.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kleidicv/src/arithmetics/compare_sve2.cpp b/kleidicv/src/arithmetics/compare_sve2.cpp index 1863845dd..d83f604f8 100644 --- a/kleidicv/src/arithmetics/compare_sve2.cpp +++ b/kleidicv/src/arithmetics/compare_sve2.cpp @@ -7,19 +7,19 @@ namespace kleidicv::sve2 { template -KLEIDICV_LOCALLY_STREAMING kleidicv_error_t -compare_equal(const ScalarType *src_a, size_t src_a_stride, - const ScalarType *src_b, size_t src_b_stride, ScalarType *dst, - size_t dst_stride, size_t width, size_t height) { +kleidicv_error_t compare_equal(const ScalarType *src_a, size_t src_a_stride, + const ScalarType *src_b, size_t src_b_stride, + ScalarType *dst, size_t dst_stride, size_t width, + size_t height) { return compare_sc>( src_a, src_a_stride, src_b, src_b_stride, dst, dst_stride, width, height); } template -KLEIDICV_LOCALLY_STREAMING kleidicv_error_t -compare_greater(const ScalarType *src_a, size_t src_a_stride, - const ScalarType *src_b, size_t src_b_stride, ScalarType *dst, - size_t dst_stride, size_t width, size_t height) { +kleidicv_error_t compare_greater(const ScalarType *src_a, size_t src_a_stride, + const ScalarType *src_b, size_t src_b_stride, + ScalarType *dst, size_t dst_stride, + size_t width, size_t height) { return compare_sc>( src_a, src_a_stride, src_b, src_b_stride, dst, dst_stride, width, height); } -- GitLab From c40d964917813492283317c0f4c4b76aca8f3ade Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 7 Aug 2024 10:17:24 +0200 Subject: [PATCH 8/8] Add SVE intrinsics necessary for comparing --- kleidicv/include/kleidicv/sc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kleidicv/include/kleidicv/sc.h b/kleidicv/include/kleidicv/sc.h index 55f45f955..c798d692c 100644 --- a/kleidicv/include/kleidicv/sc.h +++ b/kleidicv/include/kleidicv/sc.h @@ -331,6 +331,14 @@ class VecTraits : public VecTraitsBase { static inline svint8_t svdup(int8_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_s8(v); } + static inline svuint8_t svreinterpret(svint8_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_u8(v); + } + static inline svint8_t svasr_n(svbool_t pg, svint8_t v, + uint8_t s) KLEIDICV_STREAMING_COMPATIBLE { + return svasr_n_s8_x(pg, v, s); + } }; // end of class VecTraits template <> @@ -339,6 +347,14 @@ class VecTraits : public VecTraitsBase { static inline svuint8_t svdup(uint8_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_u8(v); } + static inline svint8_t svreinterpret(svuint8_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_s8(v); + } + static inline svuint8_t svhsub(svbool_t pg, svuint8_t v, + svuint8_t u) KLEIDICV_STREAMING_COMPATIBLE { + return svhsub_u8_x(pg, v, u); + } }; // end of class VecTraits template <> -- GitLab