From 1a1f6b265c2522a9fcd638f742da28139a71ce6c Mon Sep 17 00:00:00 2001 From: Maksims Svecovs Date: Thu, 15 Feb 2024 15:52:09 +0000 Subject: [PATCH 1/2] [fix] Use interleaving for large vector length For gray_to_rgba conversion use interleaving load store when operating with vectors larger than 128 bytes. Otherwise use table look up. Signed-off-by: Maksims Svecovs --- intrinsiccv/src/conversions/gray_to_rgb_sc.h | 42 ++++++++++++-------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/intrinsiccv/src/conversions/gray_to_rgb_sc.h b/intrinsiccv/src/conversions/gray_to_rgb_sc.h index 00fd0696d..968da8cb6 100644 --- a/intrinsiccv/src/conversions/gray_to_rgb_sc.h +++ b/intrinsiccv/src/conversions/gray_to_rgb_sc.h @@ -101,17 +101,11 @@ class GrayToRGB final : }; // end of class GrayToRGB template -class GrayToRGBA final : -#if !INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE - public UsesTailPath, -#endif - public UnrollTwice { +class GrayToRGBAWithInterleaving final : public UnrollTwice { public: using ContextType = sve2::Context; using VecTraits = sve2::VecTraits; using VectorType = typename VecTraits::VectorType; - -#if INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE void vector_path(ContextType ctx, VectorType src_vect, ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); @@ -120,9 +114,18 @@ class GrayToRGBA final : svst4(pg, dst, dst_vect); } -#else // INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE - explicit GrayToRGBA(svuint8x4_t &indices) INTRINSICCV_STREAMING_COMPATIBLE - : indices_{indices} { +}; // end of class GrayToRGBAWithInterleaving + +#if !INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE +template +class GrayToRGBAWithLookUpTable final : public UnrollTwice, + public UsesTailPath { + public: + using ContextType = sve2::Context; + using VecTraits = sve2::VecTraits; + using VectorType = typename VecTraits::VectorType; + explicit GrayToRGBAWithLookUpTable(svuint8x4_t &indices) + INTRINSICCV_STREAMING_COMPATIBLE : indices_{indices} { initialize_indices(); } @@ -193,8 +196,8 @@ class GrayToRGBA final : } svuint8x4_t &indices_; -#endif // INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE -}; // end of class GrayToRGBA +}; // end of class GrayToRGBAWithLookUpTable +#endif // !INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE INTRINSICCV_TARGET_FN_ATTRS static intrinsiccv_error_t gray_to_rgb_u8_sc( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, @@ -226,13 +229,20 @@ INTRINSICCV_TARGET_FN_ATTRS static intrinsiccv_error_t gray_to_rgba_u8_sc( Rectangle rect{width, height}; Rows src_rows{src, src_stride}; Rows dst_rows{dst, dst_stride, 4 /* RGBA */}; + #if INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE - GrayToRGBA operation; + GrayToRGBAWithInterleaving operation{}; + sve2::apply_operation_by_rows(operation, rect, src_rows, dst_rows); #else - svuint8x4_t table_indices; - GrayToRGBA operation{table_indices}; + if (svcntb() > 128) { + GrayToRGBAWithInterleaving operation{}; + sve2::apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } else { + svuint8x4_t table_indices; + GrayToRGBAWithLookUpTable operation{table_indices}; + sve2::apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } #endif - sve2::apply_operation_by_rows(operation, rect, src_rows, dst_rows); return INTRINSICCV_OK; } -- GitLab From 66d355032d4f11c43179d46ae02e1664cb39dcce Mon Sep 17 00:00:00 2001 From: Maksims Svecovs Date: Tue, 6 Feb 2024 21:21:52 +0000 Subject: [PATCH 2/2] Add long vector length run Test for 256 vector length Signed-off-by: Maksims Svecovs --- scripts/ci.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/ci.sh b/scripts/ci.sh index 4e47e6402..dd868b12e 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -41,17 +41,21 @@ echo '{"Checks": "-*,cppcoreguidelines-avoid-goto"}'>build/.clang-tidy ninja -C build # Run tests +LONG_VECTOR_TESTS="GRAY2.*:RGB*" TESTRESULT=0 qemu-aarch64 build/test/framework/intrinsiccv-framework-test --gtest_output=xml:build/test-results/ || TESTRESULT=1 qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/neon/ || TESTRESULT=1 qemu-aarch64 -cpu max,sve128=on,sme=off \ - build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/sve2/ --vector-length=16 || TESTRESULT=1 + build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/sve128/ --vector-length=16 || TESTRESULT=1 +qemu-aarch64 -cpu max,sve2048=on,sve-default-vector-length=256,sme=off \ + build/test/api/intrinsiccv-api-test --gtest_filter="${LONG_VECTOR_TESTS}" --gtest_output=xml:build/test-results/sve2048/ --vector-length=256 || TESTRESULT=1 qemu-aarch64 -cpu max,sve128=on,sme512=on \ build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/sme/ --vector-length=64 || TESTRESULT=1 -scripts/prefix_testsuite_names.py build/test-results/neon/intrinsiccv-api-test.xml "NEON." -scripts/prefix_testsuite_names.py build/test-results/sve2/intrinsiccv-api-test.xml "SVE2." -scripts/prefix_testsuite_names.py build/test-results/sme/intrinsiccv-api-test.xml "SME." +scripts/prefix_testsuite_names.py build/test-results/neon/intrinsiccv-api-test.xml "neon." +scripts/prefix_testsuite_names.py build/test-results/sve128/intrinsiccv-api-test.xml "sve128." +scripts/prefix_testsuite_names.py build/test-results/sve2048/intrinsiccv-api-test.xml "sve2048." +scripts/prefix_testsuite_names.py build/test-results/sme/intrinsiccv-api-test.xml "sme." # Generate test coverage report LLVM_COV=llvm-cov scripts/generate_coverage_report.py -- GitLab