diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a694fd60bd67949d793a395eb36f7688aa28f5b..7e15028aa78040edcda6c5c0f34a918693b80f08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ This changelog aims to follow the guiding principles of - Gaussian Blur API specification. - In the OpenCV HAL, cvtColor YUV2RGB_NV21 is multithreaded. - In the OpenCV HAL, minMaxIdx is multithreaded when index is not requested. +- Improved performance of Compare Equal and Greater SC API. ### Removed diff --git a/adapters/opencv/extra_benchmarks/opencv-4.9.patch b/adapters/opencv/extra_benchmarks/opencv-4.9.patch index e73dd94f3c50498da49bbce5b29703c0e9f0dd05..3f6195ee38449ffae107c333d0043e11a3bdac0e 100644 --- a/adapters/opencv/extra_benchmarks/opencv-4.9.patch +++ b/adapters/opencv/extra_benchmarks/opencv-4.9.patch @@ -2,6 +2,19 @@ // // SPDX-License-Identifier: Apache-2.0 +diff --git a/modules/core/perf/perf_compare.cpp b/modules/core/perf/perf_compare.cpp +index be706e1a83..862b8b7c35 100644 +--- a/modules/core/perf/perf_compare.cpp ++++ b/modules/core/perf/perf_compare.cpp +@@ -11,7 +11,7 @@ typedef perf::TestBaseWithParam Size_MatType_CmpType; + + PERF_TEST_P( Size_MatType_CmpType, compare, + testing::Combine( +- testing::Values(::perf::szVGA, ::perf::sz1080p), ++ testing::Values(::perf::szVGA, ::perf::sz1080p, ::perf::sz2160p), + testing::Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1), + CmpType::all() + ) diff --git a/modules/core/perf/perf_convertTo.cpp b/modules/core/perf/perf_convertTo.cpp index 344d81cb8a..ef5a3aa7d2 100644 --- a/modules/core/perf/perf_convertTo.cpp diff --git a/kleidicv/include/kleidicv/sc.h b/kleidicv/include/kleidicv/sc.h index 55f45f955eb53a3be64d0411bdba980dc733cc73..c798d692cba28882d282790e5e2add5685ee55bf 100644 --- a/kleidicv/include/kleidicv/sc.h +++ b/kleidicv/include/kleidicv/sc.h @@ -331,6 +331,14 @@ class VecTraits : public VecTraitsBase { static inline svint8_t svdup(int8_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_s8(v); } + static inline svuint8_t svreinterpret(svint8_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_u8(v); + } + static inline svint8_t svasr_n(svbool_t pg, svint8_t v, + uint8_t s) KLEIDICV_STREAMING_COMPATIBLE { + return svasr_n_s8_x(pg, v, s); + } }; // end of class VecTraits template <> @@ -339,6 +347,14 @@ class VecTraits : public VecTraitsBase { static inline svuint8_t svdup(uint8_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_u8(v); } + static inline svint8_t svreinterpret(svuint8_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_s8(v); + } + static inline svuint8_t svhsub(svbool_t pg, svuint8_t v, + svuint8_t u) KLEIDICV_STREAMING_COMPATIBLE { + return svhsub_u8_x(pg, v, u); + } }; // end of class VecTraits template <> diff --git a/kleidicv/src/arithmetics/compare_sc.h b/kleidicv/src/arithmetics/compare_sc.h index e7d8f9d48e0779dd3878b590320b42ce3524f42f..10491c45781bfb172925d505c188512fa5b21548 100644 --- a/kleidicv/src/arithmetics/compare_sc.h +++ b/kleidicv/src/arithmetics/compare_sc.h @@ -16,12 +16,19 @@ class ComparatorEqual : public UnrollTwice { using ContextType = Context; using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; using VectorType = typename VecTraits::VectorType; + using SignedScalarType = typename std::make_signed::type; + using SignedVecTraits = + KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SignedVectorType = typename SignedVecTraits::VectorType; // NOLINTBEGIN(readability-make-member-function-const) VectorType vector_path(ContextType ctx, VectorType src_a, VectorType src_b) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t predicate = svcmpeq(ctx.predicate(), src_a, src_b); - return svsel(predicate, VecTraits::svdup(255), VecTraits::svdup(0)); + svbool_t pg = ctx.predicate(); + VectorType result1 = sveor_x(pg, src_a, src_b); + VectorType result2 = svcnot_x(pg, result1); + svint8_t result3 = svqneg_x(pg, VecTraits::svreinterpret(result2)); + return SignedVecTraits::svreinterpret(result3); } // NOLINTEND(readability-make-member-function-const) }; // end of class ComparatorEqual @@ -32,12 +39,19 @@ class ComparatorGreater : public UnrollTwice { using ContextType = Context; using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; using VectorType = typename VecTraits::VectorType; + using SignedScalarType = typename std::make_signed::type; + using SignedVecTraits = + KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SignedVectorType = typename SignedVecTraits::VectorType; // NOLINTBEGIN(readability-make-member-function-const) VectorType vector_path(ContextType ctx, VectorType src_a, VectorType src_b) KLEIDICV_STREAMING_COMPATIBLE { - svbool_t predicate = svcmpgt(ctx.predicate(), src_a, src_b); - return svsel(predicate, VecTraits::svdup(255), VecTraits::svdup(0)); + svbool_t pg = ctx.predicate(); + VectorType diff = VecTraits::svhsub(pg, src_b, src_a); + svint8_t shift_right = + SignedVecTraits::svasr_n(pg, VecTraits::svreinterpret(diff), 7); + return SignedVecTraits::svreinterpret(shift_right); } // NOLINTEND(readability-make-member-function-const) }; // end of class ComparatorGreater diff --git a/kleidicv/src/arithmetics/compare_sve2.cpp b/kleidicv/src/arithmetics/compare_sve2.cpp index 1863845dd9a1eb4f3f3c20b1d132c576e1c82f3e..d83f604f8a5d6b34424f6e4fd9b212ef1d9fcdd7 100644 --- a/kleidicv/src/arithmetics/compare_sve2.cpp +++ b/kleidicv/src/arithmetics/compare_sve2.cpp @@ -7,19 +7,19 @@ namespace kleidicv::sve2 { template -KLEIDICV_LOCALLY_STREAMING kleidicv_error_t -compare_equal(const ScalarType *src_a, size_t src_a_stride, - const ScalarType *src_b, size_t src_b_stride, ScalarType *dst, - size_t dst_stride, size_t width, size_t height) { +kleidicv_error_t compare_equal(const ScalarType *src_a, size_t src_a_stride, + const ScalarType *src_b, size_t src_b_stride, + ScalarType *dst, size_t dst_stride, size_t width, + size_t height) { return compare_sc>( src_a, src_a_stride, src_b, src_b_stride, dst, dst_stride, width, height); } template -KLEIDICV_LOCALLY_STREAMING kleidicv_error_t -compare_greater(const ScalarType *src_a, size_t src_a_stride, - const ScalarType *src_b, size_t src_b_stride, ScalarType *dst, - size_t dst_stride, size_t width, size_t height) { +kleidicv_error_t compare_greater(const ScalarType *src_a, size_t src_a_stride, + const ScalarType *src_b, size_t src_b_stride, + ScalarType *dst, size_t dst_stride, + size_t width, size_t height) { return compare_sc>( src_a, src_a_stride, src_b, src_b_stride, dst, dst_stride, width, height); } diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 837f8a3af3ff3509ce5d96ec2a32b8aa6bee2cb6..50cfd47e8fd04dc87083bf336a394f35d7020c33 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -83,4 +83,7 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL IntToFloat opencv_perf_core '*convertTo/*' '(3840x2160, 8SC1, 32FC1, 1, 1, 0)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL UintToFloat opencv_perf_core '*convertTo/*' '(3840x2160, 8UC1, 32FC1, 1, 1, 0)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareEq opencv_perf_core '*compare/*' '(3840x2160, 8UC1, CMP_EQ)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareGt opencv_perf_core '*compare/*' '(3840x2160, 8UC1, CMP_GT)')") + echo "$RES" diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index 838061b88dbab8db62021baa8f742b6ebbc85b80..234e2767d205e9729919d4d598afef847b5c6f81 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -83,5 +83,7 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL IntToFloat opencv_perf_core '*convertTo/*' '(1920x1080, 8SC1, 32FC1, 1, 1, 0)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL UintToFloat opencv_perf_core '*convertTo/*' '(1920x1080, 8UC1, 32FC1, 1, 1, 0)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareEq opencv_perf_core '*compare/*' '(1920x1080, 8UC1, CMP_EQ)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL CompareGt opencv_perf_core '*compare/*' '(1920x1080, 8UC1, CMP_GT)')") echo "$RES" diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index 23a6381105c710ba75a0e9a2acb57b9332252b45..5f30336b09c322f2fd190d752ffdd043a8a8bb12 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -66,6 +66,7 @@ CORE_TEST_PATTERNS=( '*Core_MinMaxIdx*' '*Core_minMaxIdx*' '*Core_Array*' + '*Compare*' ) CORE_TEST_PATTERNS_STR="$(join_strings_with_colon "${CORE_TEST_PATTERNS[*]}")" ../../../conformity/opencv_kleidicv/bin/opencv_test_core \ diff --git a/test/api/test_compare.cpp b/test/api/test_compare.cpp index 5c0642d5349ebd802afc52a7523200833b675aba..b5b1230fea5edb50820742b63834541bb209a66a 100644 --- a/test/api/test_compare.cpp +++ b/test/api/test_compare.cpp @@ -82,10 +82,11 @@ class CompareTestLinear final { test::Array2D actual = test::Array2D(width, height, padding_, 1); - GenerateLinearSeries generator(min()); + GenerateLinearSeries generator_a(min()); + GenerateLinearSeries generator_b(128); - source_a.fill(generator); - source_b.fill(255); + source_a.fill(generator_a); + source_b.fill(generator_b); expected.fill(0); calculate_expected(source_a, source_b, expected);