From a03ff21dbc2380e742dc6c04dbb562f041a17464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Fri, 22 Mar 2024 16:52:19 +0100 Subject: [PATCH 1/8] Fix typo in sve2.h --- intrinsiccv/include/intrinsiccv/sve2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intrinsiccv/include/intrinsiccv/sve2.h b/intrinsiccv/include/intrinsiccv/sve2.h index cd326d0c7..df7b4f3e9 100644 --- a/intrinsiccv/include/intrinsiccv/sve2.h +++ b/intrinsiccv/include/intrinsiccv/sve2.h @@ -491,7 +491,7 @@ class RemainingPathAdapter : public OperationBase { } }; // end of class RemainingPathAdapter -// Shorthand for applying a generic unrolled NEON operation. +// Shorthand for applying a generic unrolled SVE2 operation. template void apply_operation_by_rows(OperationType &operation, ArgTypes &&...args) INTRINSICCV_STREAMING_COMPATIBLE { -- GitLab From 5b91a0d2e51d89c18b6cb8454f092a47d1a2829d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Fri, 22 Mar 2024 17:12:28 +0100 Subject: [PATCH 2/8] Better fix for error "non-constant-expression cannot be narrowed" --- test/api/test_resize_linear.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/api/test_resize_linear.cpp b/test/api/test_resize_linear.cpp index a011a259b..18dcd6f3f 100644 --- a/test/api/test_resize_linear.cpp +++ b/test/api/test_resize_linear.cpp @@ -239,7 +239,7 @@ static void do_large_dimensions_test(size_t x_scale, size_t y_scale) { src.resize(src_stride * src_height); dst.resize(dst_stride * dst_height); expected_data.resize(dst_stride * dst_height); - std::mt19937 generator{static_cast(test::Options::seed())}; + std::mt19937 generator(test::Options::seed()); std::generate(src.begin(), src.end(), generator); resize_linear_unaccelerated_u8(src.data(), src_stride, src_width, src_height, expected_data.data(), dst_stride, dst_width, -- GitLab From 64c7392a65eb5e21da2a15ee9783cc53596f1bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 4 Apr 2024 11:28:09 +0200 Subject: [PATCH 3/8] Add *.h.in files checking to formatting script --- scripts/format.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/format.sh b/scripts/format.sh index fba356fed..c50dfeda6 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -33,7 +33,7 @@ SOURCES="$(find \ "${INTRINSICCV_ROOT_PATH}"/benchmark \ "${INTRINSICCV_ROOT_PATH}"/intrinsiccv \ "${INTRINSICCV_ROOT_PATH}"/test \ - \( -name \*.cpp -o -name \*.h \) \ + \( -name \*.cpp -o -name \*.h -o -name \*.h.in \) \ -print)" if [[ "${CHECK_ONLY}" == "ON" ]]; then -- GitLab From ab25b050265ce0f0cd658beac485414d984de65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Fri, 22 Mar 2024 17:34:23 +0100 Subject: [PATCH 4/8] Implement float32 to int8 type conversion --- adapters/opencv/intrinsiccv_hal.cpp | 9 ++ intrinsiccv/include/intrinsiccv/intrinsiccv.h | 23 +++++ .../src/conversions/float_to_int_api.cpp | 44 +++++++++ .../src/conversions/float_to_int_neon.cpp | 20 +++++ intrinsiccv/src/conversions/float_to_int_sc.h | 89 +++++++++++++++++++ .../src/conversions/float_to_int_sme2.cpp | 21 +++++ .../src/conversions/float_to_int_sve2.cpp | 19 ++++ 7 files changed, 225 insertions(+) create mode 100644 intrinsiccv/src/conversions/float_to_int_api.cpp create mode 100644 intrinsiccv/src/conversions/float_to_int_neon.cpp create mode 100644 intrinsiccv/src/conversions/float_to_int_sc.h create mode 100644 intrinsiccv/src/conversions/float_to_int_sme2.cpp create mode 100644 intrinsiccv/src/conversions/float_to_int_sve2.cpp diff --git a/adapters/opencv/intrinsiccv_hal.cpp b/adapters/opencv/intrinsiccv_hal.cpp index 3076ea6d5..d22318ddc 100644 --- a/adapters/opencv/intrinsiccv_hal.cpp +++ b/adapters/opencv/intrinsiccv_hal.cpp @@ -670,6 +670,15 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, uchar *dst_data, size_t dst_step, int dst_depth, int width, int height, double scale, double shift) { if (src_depth != dst_depth) { + // type conversion + if (scale == 1.0 && shift == 0.0) { + // float32 to int8 + if (src_depth == CV_32F && dst_depth == CV_8S) { + return convert_error(intrinsiccv_type_conversion_f32_s8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + } return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/intrinsiccv/include/intrinsiccv/intrinsiccv.h b/intrinsiccv/include/intrinsiccv/intrinsiccv.h index c58397274..3e5d94a2c 100644 --- a/intrinsiccv/include/intrinsiccv/intrinsiccv.h +++ b/intrinsiccv/include/intrinsiccv/intrinsiccv.h @@ -1268,6 +1268,29 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src, size_t width, size_t height, float scale, float shift); +/// Converts the elements in `src` from type `float` to type `int8_t`, +/// then stores the result in `dst`. +/// +/// Each resulting element is saturated, i.e. it is the smallest/largest +/// number of the type of the element if the result would underflow/overflow. +/// Source and destination data length is `width` * `height`. Number of elements +/// is limited to @ref INTRINSICCV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. +/// Must not be less than width * sizeof(type). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must not be less than width * sizeof(type). +/// @param width Number of elements in a row. +/// @param height Number of rows in the data. +/// +INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_s8, + const float *src, size_t src_stride, int8_t *dst, + size_t dst_stride, size_t width, size_t height); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/intrinsiccv/src/conversions/float_to_int_api.cpp b/intrinsiccv/src/conversions/float_to_int_api.cpp new file mode 100644 index 000000000..8a8a92f42 --- /dev/null +++ b/intrinsiccv/src/conversions/float_to_int_api.cpp @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "intrinsiccv/dispatch.h" +#include "intrinsiccv/intrinsiccv.h" +#include "intrinsiccv/types.h" + +namespace intrinsiccv { + +namespace neon { + +intrinsiccv_error_t type_conversion_float_to_int8_t( + const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace neon + +namespace sve2 { + +intrinsiccv_error_t type_conversion_float_to_int8_t( + const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace sve2 + +namespace sme2 { + +intrinsiccv_error_t type_conversion_float_to_int8_t( + const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace sme2 + +#define INTRINSICCV_DEFINE_C_API(name, itype, otype) \ + INTRINSICCV_MULTIVERSION_C_API( \ + name, intrinsiccv::neon::type_conversion_##itype##_to_##otype, \ + INTRINSICCV_SVE2_IMPL_IF( \ + intrinsiccv::sve2::type_conversion_##itype##_to_##otype), \ + intrinsiccv::sme2::type_conversion_##itype##_to_##otype) + +INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, float, int8_t); + +} // namespace intrinsiccv diff --git a/intrinsiccv/src/conversions/float_to_int_neon.cpp b/intrinsiccv/src/conversions/float_to_int_neon.cpp new file mode 100644 index 000000000..6d3fee394 --- /dev/null +++ b/intrinsiccv/src/conversions/float_to_int_neon.cpp @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "intrinsiccv/intrinsiccv.h" +#include "intrinsiccv/neon.h" + +namespace intrinsiccv::neon { + +intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t, + int8_t*, size_t, size_t, + size_t); + +intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t, + int8_t*, size_t, size_t, + size_t) { + return INTRINSICCV_ERROR_NOT_IMPLEMENTED; +} + +} // namespace intrinsiccv::neon diff --git a/intrinsiccv/src/conversions/float_to_int_sc.h b/intrinsiccv/src/conversions/float_to_int_sc.h new file mode 100644 index 000000000..10f8f749a --- /dev/null +++ b/intrinsiccv/src/conversions/float_to_int_sc.h @@ -0,0 +1,89 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef INTRINSICCV_FLOAT_TO_INT_SC_H +#define INTRINSICCV_FLOAT_TO_INT_SC_H + +#include "intrinsiccv/intrinsiccv.h" +#include "intrinsiccv/sve2.h" + +namespace INTRINSICCV_TARGET_NAMESPACE { + +class float_to_int_operation final { + public: + using SrcVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using DstVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits; + using DstVectorType = typename DstVecTraits::VectorType; + + using ContextType = INTRINSICCV_TARGET_NAMESPACE::Context; + using VecTraits = SrcVecTraits; + + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, VecTraits::num_lanes()} + .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE { + svbool_t pg = VecTraits::svptrue(); + Context ctx{pg}; + SrcVectorType src_vector1 = svld1(pg, &src[0]); + SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1); + DstVectorType result_vector1 = vector_path(ctx, src_vector1); + DstVectorType result_vector2 = vector_path(ctx, src_vector2); + svst1b(pg, &dst[0], result_vector1); + svst1b_vnum(pg, &dst[0], 1, result_vector2); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE { + size_t index = 0; + svbool_t pg = VecTraits::svwhilelt(index, length); + Context ctx{pg}; + while (svptest_first(VecTraits::svptrue(), pg)) { + SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]); + DstVectorType result_vector = vector_path(ctx, src_vector); + svst1b(pg, &dst[ptrdiff_t(index)], result_vector); + // Update loop counter and calculate the next governing predicate. + index += VecTraits::num_lanes(); + pg = VecTraits::svwhilelt(index, length); + ctx.set_predicate(pg); + } + }); + } + + private: + DstVectorType vector_path(ContextType ctx, SrcVectorType src) + INTRINSICCV_STREAMING_COMPATIBLE { + svbool_t pg = ctx.predicate(); + + src = svrinti_f32_x(pg, src); + + svbool_t less = svcmplt_n_f32(pg, src, -128.0); + src = svdup_n_f32_m(src, less, -128.0); + + svbool_t greater = svcmpgt_n_f32(pg, src, 127.0); + src = svdup_n_f32_m(src, greater, 127.0); + + return svcvt_s32_f32_x(pg, src); + } +}; // end of class float_to_int_operation + +static intrinsiccv_error_t type_conversion_float_to_int8_t_sc( + const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, + size_t width, size_t height) INTRINSICCV_STREAMING_COMPATIBLE { + CHECK_POINTER_AND_STRIDE(src, src_stride); + CHECK_POINTER_AND_STRIDE(dst, dst_stride); + CHECK_IMAGE_SIZE(width, height); + + float_to_int_operation operation; + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + zip_rows(operation, rect, src_rows, dst_rows); + + return INTRINSICCV_OK; +} + +} // namespace INTRINSICCV_TARGET_NAMESPACE + +#endif // INTRINSICCV_FLOAT_TO_INT_SC_H diff --git a/intrinsiccv/src/conversions/float_to_int_sme2.cpp b/intrinsiccv/src/conversions/float_to_int_sme2.cpp new file mode 100644 index 000000000..af389a575 --- /dev/null +++ b/intrinsiccv/src/conversions/float_to_int_sme2.cpp @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "float_to_int_sc.h" + +namespace intrinsiccv::sme2 { + +INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t +type_conversion_float_to_int8_t(const float*, size_t, int8_t*, size_t, size_t, + size_t); + +INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t +type_conversion_float_to_int8_t(const float* src, size_t src_stride, + int8_t* dst, size_t dst_stride, size_t width, + size_t height) { + return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride, + width, height); +} + +} // namespace intrinsiccv::sme2 diff --git a/intrinsiccv/src/conversions/float_to_int_sve2.cpp b/intrinsiccv/src/conversions/float_to_int_sve2.cpp new file mode 100644 index 000000000..f58ea271e --- /dev/null +++ b/intrinsiccv/src/conversions/float_to_int_sve2.cpp @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "float_to_int_sc.h" + +namespace intrinsiccv::sve2 { + +INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t( + const float*, size_t, int8_t*, size_t, size_t, size_t); + +INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t( + const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, + size_t width, size_t height) { + return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride, + width, height); +} + +} // namespace intrinsiccv::sve2 -- GitLab From 0879a05b8126f8fe7931ecf5610b157067072c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Thu, 28 Mar 2024 18:27:48 +0100 Subject: [PATCH 5/8] Added unit tests for float32 to int8 conversion --- intrinsiccv/include/intrinsiccv/config.h.in | 11 + test/api/test_float_to_int8_t.cpp | 251 ++++++++++++++++++++ test/framework/array.h | 16 ++ 3 files changed, 278 insertions(+) create mode 100644 test/api/test_float_to_int8_t.cpp diff --git a/intrinsiccv/include/intrinsiccv/config.h.in b/intrinsiccv/include/intrinsiccv/config.h.in index 068c88b66..dd521b82a 100644 --- a/intrinsiccv/include/intrinsiccv/config.h.in +++ b/intrinsiccv/include/intrinsiccv/config.h.in @@ -90,4 +90,15 @@ #define INTRINSICCV_NODISCARD #endif +// GCC and clang +#ifdef __GNUC__ +#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"") +#define INTRINSICCV_NO_STRICT_ALIASING_END _Pragma("GCC diagnostic pop") +#else +#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN +#define INTRINSICCV_NO_STRICT_ALIASING_END +#endif + #endif // INTRINSICCV_CONFIG_H diff --git a/test/api/test_float_to_int8_t.cpp b/test/api/test_float_to_int8_t.cpp new file mode 100644 index 000000000..23d03507d --- /dev/null +++ b/test/api/test_float_to_int8_t.cpp @@ -0,0 +1,251 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/operation.h" +#include "framework/utils.h" +#include "intrinsiccv/intrinsiccv.h" +#include "test_config.h" + +#define INTRINSICCV_TYPE_CONVERSION(itype, input_type_name) \ + INTRINSICCV_API(float_to_int8_t, \ + intrinsiccv_type_conversion_##input_type_name##_s8, itype) + +INTRINSICCV_TYPE_CONVERSION(float, f32); + +template +class Float32ToInt8TestBase { + using OutputType = int8_t; + + protected: + static constexpr OutputType min() { + return std::numeric_limits::min(); + } + static constexpr OutputType max() { + return std::numeric_limits::max(); + } + + struct Elements { + size_t width; + size_t height; + + std::vector> source_rows; + std::vector> expected_rows; + + Elements(size_t _width, size_t _height, + std::vector>&& _source_rows, + std::vector>&& _expected_rows) + : width(_width), + height(_height), + source_rows(std::move(_source_rows)), + expected_rows(std::move(_expected_rows)) {} + }; + + private: + static constexpr uint32_t quietNaN = 0x7FC00000; + static constexpr uint32_t signalingNaN = 0x7FA00000; + static constexpr uint32_t posInfinity = 0x7F800000; + static constexpr uint32_t negInfinity = 0xFF800000; + + static constexpr uint32_t minusNaN = 0xFF800001; + static constexpr uint32_t plusNaN = 0x7F800001; + static constexpr uint32_t plusZero = 0x00000000; + static constexpr uint32_t minusZero = 0x80000000; + + static constexpr uint32_t oneNaN = 0x7FC00001; + static constexpr uint32_t zeroDivZero = 0xFFC00000; + static constexpr uint32_t floatMin = 0x00800000; + static constexpr uint32_t floatMax = 0x7F7FFFFF; + + static constexpr float _floatval(uint32_t v) { + static_assert(sizeof(float) == 4); + INTRINSICCV_NO_STRICT_ALIASING_BEGIN + return *reinterpret_cast(&v); + INTRINSICCV_NO_STRICT_ALIASING_END + } + + const Elements test_case_custom = { + // clang-format off + 4, 6, + {{ + { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) }, + { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) }, + { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) }, + { 1111.11, -1112.22, 113.33, 114.44 }, + { 111.51, 112.62, 113.73, 114.84 }, + { 126.66, 127.11, 128.66, 129.11 }, + { 11.5, 12.5, -11.5, -12.5 } + }}, + {{ + { 0, 0, 127, -128 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 127 }, + { 127, -128, 113, 114 }, + { 112, 113, 114, 115 }, + { 127, 127, 127, 127 }, + { 12, 12, -12, -12 } + }} + // clang-format on + }; + + public: + // minimum_size set by caller to trigger the 'big' conversion path. + void test_scalar(size_t minimum_size = 1) { + size_t width = test::Options::vector_length() - 1; + test_linear(width, minimum_size); + } + + void test_vector(size_t minimum_size = 1) { + size_t width = test::Options::vector_length() * 2; + test_linear(width, minimum_size); + } + + void test_custom() { + const size_t& width = test_case_custom.width; + const size_t& height = test_case_custom.height; + + test::Array2D source(width, height); + test::Array2D expected(width, height); + test::Array2D actual(width, height); + + for (size_t i = 0; i < height; i++) { + source.set(i, 0, test_case_custom.source_rows[i]); + expected.set(i, 0, test_case_custom.expected_rows[i]); + } + + ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height)); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + void test_fill(const size_t width, const size_t height) { + test::Array2D source(width, height, 1, 1); + test::Array2D expected(width, height, 1, 1); + test::Array2D actual(width, height, 1, 1); + + source.fill(10.67F); + expected.fill(11); + + actual.fill(0); + + ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height)); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + private: + class GenerateLinearSeries : public test::Generator { + public: + explicit GenerateLinearSeries(ElementType start_from) + : counter_{start_from} {} + + std::optional next() override { return counter_++; } + + private: + ElementType counter_; + }; // end of class GenerateLinearSeries + + void test_linear(size_t width, size_t minimum_size) { + size_t image_size = + std::max(minimum_size, static_cast(max() - min())); + size_t height = image_size / width + 1; + test::Array2D source(width, height, 1, 1); + test::Array2D expected(width, height, 1, 1); + test::Array2D actual(width, height, 1, 1); + + GenerateLinearSeries generator(min()); + + source.fill(generator); + + calculate_expected(source, expected); + + ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height)); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + protected: + void calculate_expected(const test::Array2D& source, + test::Array2D& expected) { + for (size_t hindex = 0; hindex < source.height(); ++hindex) { + for (size_t vindex = 0; vindex < source.width(); ++vindex) { + OutputType calculated = 0; + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + ElementType result = *source.at(hindex, vindex); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + if (result > max()) { + calculated = max(); + } else if (result < min()) { + calculated = min(); + } else { + calculated = result; + } + *expected.at(hindex, vindex) = calculated; + } + } + } +}; // end of class Float32ToInt8TestBase + +template +class Float32ToInt8Test1 final : public Float32ToInt8TestBase {}; + +template +class Float32ToInt8Test : public testing::Test {}; + +using ElementTypes = ::testing::Types; + +// Tests intrinsiccv_float_to_int8_t API. +TYPED_TEST_SUITE(Float32ToInt8Test, ElementTypes); + +TYPED_TEST(Float32ToInt8Test, TestScalar) { + Float32ToInt8Test1{}.test_scalar(); +} +TYPED_TEST(Float32ToInt8Test, TestVector) { + Float32ToInt8Test1{}.test_vector(); +} +TYPED_TEST(Float32ToInt8Test, TestCustomValues) { + Float32ToInt8Test1{}.test_custom(); +} +TYPED_TEST(Float32ToInt8Test, TestCustomFits128VectorSize) { + Float32ToInt8Test1{}.test_fill(4, 1); +} +TYPED_TEST(Float32ToInt8Test, TestCustomFits128VectorSize2x) { + Float32ToInt8Test1{}.test_fill(4, 2); +} +TYPED_TEST(Float32ToInt8Test, TestCustomFits128VectorSize3x) { + Float32ToInt8Test1{}.test_fill(4, 3); +} +TYPED_TEST(Float32ToInt8Test, TestCustomFits512VectorSize) { + Float32ToInt8Test1{}.test_fill(4, 4); +} +TYPED_TEST(Float32ToInt8Test, TestCustomFits512VectorSize2x) { + Float32ToInt8Test1{}.test_fill(4, 8); +} +TYPED_TEST(Float32ToInt8Test, TestCustomFits512VectorSize3x) { + Float32ToInt8Test1{}.test_fill(6, 8); +} +TYPED_TEST(Float32ToInt8Test, TestCustom128OneRemaining) { + Float32ToInt8Test1{}.test_fill(1, 17); +} +TYPED_TEST(Float32ToInt8Test, TestCustom128AllButOneRemaining) { + Float32ToInt8Test1{}.test_fill(5, 3); +} +TYPED_TEST(Float32ToInt8Test, TestCustomAboutHalfRemaining) { + Float32ToInt8Test1{}.test_fill(19, 2); +} +TYPED_TEST(Float32ToInt8Test, TestCustomEmpty) { + Float32ToInt8Test1{}.test_fill(0, 0); +} +TYPED_TEST(Float32ToInt8Test, TestCustomOne) { + Float32ToInt8Test1{}.test_fill(1, 1); +} diff --git a/test/framework/array.h b/test/framework/array.h index f854c9819..7f010cac6 100644 --- a/test/framework/array.h +++ b/test/framework/array.h @@ -141,6 +141,22 @@ class Array2D : public TwoDimensional { } } + // Sets values in a row starting at a given column from a const vector. + void set(size_t row, size_t column, const std::vector &values) { + ASSERT_EQ(valid(), true) << "Array is invalid."; + ASSERT_GE(width() - column, values.size()); + + ElementType *ptr = at(row, column); + if (!ptr) { + return; + } + + size_t index = 0; + for (ElementType value : values) { + ptr[index++] = value; + } + } + // Sets values starting in a given row starting at a given column. // // The layout of the input TwoDimensional object is not altered, meaning that -- GitLab From db064099398c9691a2143bc1d4b344829f4b937d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Tue, 2 Apr 2024 15:19:51 +0200 Subject: [PATCH 6/8] Exclude NEON float tests from CI --- scripts/ci.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/ci.sh b/scripts/ci.sh index 25086dfb0..e0c44cb0b 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -48,16 +48,17 @@ ninja -C build/gcc # Run tests LONG_VECTOR_TESTS="GRAY2.*:RGB*" +FLOAT_CONVERSION_TESTS="-Float32ToInt8Test*" TESTRESULT=0 qemu-aarch64 build/test/framework/intrinsiccv-framework-test --gtest_output=xml:build/test-results/ || TESTRESULT=1 -qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-neon/ || TESTRESULT=1 +qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_filter="${FLOAT_CONVERSION_TESTS}" --gtest_output=xml:build/test-results/clang-neon/ || TESTRESULT=1 qemu-aarch64 -cpu max,sve128=on,sme=off \ build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-sve128/ --vector-length=16 || TESTRESULT=1 qemu-aarch64 -cpu max,sve2048=on,sve-default-vector-length=256,sme=off \ build/test/api/intrinsiccv-api-test --gtest_filter="${LONG_VECTOR_TESTS}" --gtest_output=xml:build/test-results/clang-sve2048/ --vector-length=256 || TESTRESULT=1 qemu-aarch64 -cpu max,sve128=on,sme512=on \ build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-sme/ --vector-length=64 || TESTRESULT=1 -qemu-aarch64 -cpu cortex-a35 build/gcc/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/gcc-neon/ || TESTRESULT=1 +qemu-aarch64 -cpu cortex-a35 build/gcc/test/api/intrinsiccv-api-test --gtest_filter="${FLOAT_CONVERSION_TESTS}" --gtest_output=xml:build/test-results/gcc-neon/ || TESTRESULT=1 scripts/prefix_testsuite_names.py build/test-results/clang-neon/intrinsiccv-api-test.xml "clang-neon." scripts/prefix_testsuite_names.py build/test-results/clang-sve128/intrinsiccv-api-test.xml "clang-sve128." @@ -76,7 +77,7 @@ if [[ $(dpkg --print-architecture) = arm64 ]]; then -DINTRINSICCV_ENABLE_SME2=OFF \ -DCMAKE_CXX_FLAGS="-fsanitize=address,undefined -fno-sanitize-recover=all -Wno-pass-failed" ninja -C build/sanitize intrinsiccv-api-test - build/sanitize/test/api/intrinsiccv-api-test + build/sanitize/test/api/intrinsiccv-api-test --gtest_filter="${FLOAT_CONVERSION_TESTS}" fi # Build benchmarks, just to prevent bitrot. -- GitLab From eac64557bac9dcac3adc4cf3e7627be5391979a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 3 Apr 2024 13:46:36 +0200 Subject: [PATCH 7/8] Added OpenCV conformity tests (float32 to int8) --- README.md | 5 +- conformity/opencv/CMakeLists.txt | 90 ++++++++ conformity/opencv/README.md | 25 +++ conformity/opencv/common.h | 273 ++++++++++++++++++++++++ conformity/opencv/manager.cpp | 56 +++++ conformity/opencv/subordinate.cpp | 18 ++ conformity/opencv/tests.cpp | 137 ++++++++++++ conformity/opencv/tests.h | 18 ++ scripts/ci.sh | 4 + scripts/format.sh | 9 +- scripts/run_opencv_conformity_checks.sh | 42 ++++ 11 files changed, 671 insertions(+), 6 deletions(-) create mode 100644 conformity/opencv/CMakeLists.txt create mode 100644 conformity/opencv/README.md create mode 100644 conformity/opencv/common.h create mode 100644 conformity/opencv/manager.cpp create mode 100644 conformity/opencv/subordinate.cpp create mode 100644 conformity/opencv/tests.cpp create mode 100644 conformity/opencv/tests.h create mode 100755 scripts/run_opencv_conformity_checks.sh diff --git a/README.md b/README.md index d3a241b19..2d0fbdabe 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,9 @@ An adapter layer API is currently provided for: The directory `intrinsiccv` contains generic implementation of the library. Integration with other projects are stored in `adapters` folder. `test` contains -API and unit tests for the library. All supporting scripts are located in -`scripts`. +API and unit tests for the library. `benchmark` contains benchmark source. +`conformity` contains checks to compare the library output with different +implementations. All supporting scripts are located in `scripts`. # Standalone build using CMake diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt new file mode 100644 index 000000000..2a17f31b6 --- /dev/null +++ b/conformity/opencv/CMakeLists.txt @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +# +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.16) + +project("OpenCV Conformity" CXX) + +set(CMAKE_CXX_STANDARD 17) + +set(OPENCV_PATCH_VERSION "4.9") +set(OPENCV_VERSION "${OPENCV_PATCH_VERSION}.0") + +include(FetchContent) + +FetchContent_Declare( + OpenCV + URL https://github.com/opencv/opencv/archive/refs/tags/${OPENCV_VERSION}.tar.gz + PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/../../adapters/opencv/opencv-${OPENCV_PATCH_VERSION}.patch +) + +FetchContent_MakeAvailable(OpenCV) + +# Manager +add_executable( + manager + manager.cpp + tests.cpp +) + +target_link_libraries( + manager + opencv_core + opencv_imgproc +) + +target_include_directories( + manager + PRIVATE + ${CMAKE_BINARY_DIR} + ${OpenCV_SOURCE_DIR}/modules/core/include + ${OpenCV_SOURCE_DIR}/modules/imgproc/include +) + +target_compile_definitions( + manager + PRIVATE SUBORDINATE=0 +) + +target_compile_options( + manager + PRIVATE + "-Werror" + "-Wall" + "-Wextra" +) + +# Subordinate +add_executable( + subordinate + subordinate.cpp + tests.cpp +) + +target_link_libraries( + subordinate + opencv_core + opencv_imgproc +) + +target_include_directories( + subordinate + PRIVATE + ${CMAKE_BINARY_DIR} + ${OpenCV_SOURCE_DIR}/modules/core/include + ${OpenCV_SOURCE_DIR}/modules/imgproc/include +) + +target_compile_definitions( + subordinate + PRIVATE SUBORDINATE=1 +) + +target_compile_options( + subordinate + PRIVATE + "-Werror" + "-Wall" + "-Wextra" +) diff --git a/conformity/opencv/README.md b/conformity/opencv/README.md new file mode 100644 index 000000000..a26fc1d4d --- /dev/null +++ b/conformity/opencv/README.md @@ -0,0 +1,25 @@ + + +# Conformity checks for OpenCV + +This CMake project makes it possible to automatically compare IntrinsicCV +results with vanilla OpenCV for a given operation. + +To achieve this the project needs to be built twice (vanilla version and +IntrinsicCV one) as the availabilty of IntrinsicCV for a given operation is a +compile time decision. Then, the built executables (`manager` and `subordinate`, +provided by different builds) perform the same operations, and the results are +compared. The communication between the executables is implemented with POSIX +IPC. + +The tests can be run from the project's root like: +``` +scripts/run_opencv_conformity_checks.sh +``` + +The script expects an environment where IntrinsicCV can be built natively with +`cmake` and `ninja`, and `qemu-aarch64` is available. diff --git a/conformity/opencv/common.h b/conformity/opencv/common.h new file mode 100644 index 000000000..f7736dcc2 --- /dev/null +++ b/conformity/opencv/common.h @@ -0,0 +1,273 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef INTRINSICCV_OPENCV_CONFORMITY_COMMON_H_ +#define INTRINSICCV_OPENCV_CONFORMITY_COMMON_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +#define SHM_ID "/opencv_intrinisiccv_conformity_check_shm" +#define SHM_SIZE (1024 * 1024) + +#define REQUEST_MQ_ID "/opencv_intrinisiccv_conformity_request_queue" +#define REPLY_MQ_ID "/opencv_intrinisiccv_conformity_reply_queue" + +class ExceptionWithErrno : public std::exception { + public: + explicit ExceptionWithErrno(const std::string& msg) + : msg_with_errno_{add_errno_details(msg)} {} + virtual const char* what() const noexcept { return msg_with_errno_.c_str(); } + + private: + std::string add_errno_details(const std::string& msg) { + std::string errno_string(strerror(errno)); + return msg + ": " + errno_string; + } + + std::string msg_with_errno_; +}; // end of class ExceptionWithErrno + +// Class to provide a file descriptor created with shm_open() +template +class ShmFD { + public: + template + explicit ShmFD(std::enable_if_t id) + : id_{}, fd_{open(id)} {} + + template + explicit ShmFD(std::enable_if_t id) + : id_{id}, fd_{unlink_and_open(id)} {} + + virtual ~ShmFD() { + close(fd_); + if (Recreated) { + shm_unlink(id_.c_str()); + } + } + + // Disable copying + ShmFD(ShmFD const&) = delete; + ShmFD& operator=(ShmFD) = delete; + + int fd() const { return fd_; } + + private: + static int open(const std::string& id) { + int fd = shm_open(id.c_str(), O_RDWR, 0666); + if (fd < 0) { + throw ExceptionWithErrno("Cannot open shared memory, id: " + id); + } + return fd; + } + + static int unlink_and_open(const std::string& id) { + if (shm_unlink(id.c_str())) { + if (errno != ENOENT) { + throw ExceptionWithErrno("Cannot delete shared memory, id: " + id); + } + } + int fd = shm_open(id.c_str(), O_RDWR | O_CREAT | O_EXCL, 0666); + if (fd < 0) { + throw ExceptionWithErrno("Cannot open shared memory, id: " + id); + } + return fd; + } + + const std::string id_; + int fd_; +}; // end of class ShmFD + +// Class to provide mapped shared memory +template +class SharedMemory { + public: + explicit SharedMemory(const std::string& id, size_t size) + : mem_{nullptr}, size_{size}, shm_fd_{id} { + if (ftruncate(shm_fd_.fd(), size)) { + throw ExceptionWithErrno("Failed to set the size of shared memory, id: " + + id); + } + + mem_ = + mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd_.fd(), 0); + if (mem_ == MAP_FAILED) { + throw ExceptionWithErrno("Failed to map shared memory, id: " + id); + } + } + + virtual ~SharedMemory() { munmap(mem_, size_); } + + // Disable copying + SharedMemory(SharedMemory const&) = delete; + SharedMemory& operator=(SharedMemory) = delete; + + cv::Mat cv_mat(int rows, int cols, int mat_type) { + size_t requested_size = rows * cols * cv::Mat(1, 1, mat_type).elemSize(); + if (requested_size > size_) { + throw std::runtime_error( + "Requested matrix is bigger than the shared memory size"); + } + return cv::Mat(rows, cols, mat_type, mem_); + } + + void store_mat(const cv::Mat& mat) { + size_t matrix_size = mat.rows * mat.cols * mat.elemSize(); + if (matrix_size > size_) { + throw std::runtime_error( + "Input matrix is bigger than the shared memory size"); + } + memcpy(mem_, reinterpret_cast(mat.ptr()), matrix_size); + } + + private: + void* mem_; + size_t size_; + ShmFD shm_fd_; +}; // end of class SharedMemory + +using OpenedSharedMemory = SharedMemory; +using RecreatedSharedMemory = SharedMemory; + +// Class to provide a message queue +template +class MessageQueue { + public: + template + explicit MessageQueue(std::enable_if_t id, + SharedMemory& sm) + : id_{}, queue_desc_{open(id)}, sm_{sm} {} + + template + explicit MessageQueue(std::enable_if_t id, + SharedMemory& sm) + : id_{id}, queue_desc_{unlink_and_open(id)}, sm_{sm} {} + + virtual ~MessageQueue() { + mq_close(queue_desc_); + if (Recreated) { + mq_unlink(id_.c_str()); + } + } + + // Disable copying + MessageQueue(MessageQueue const&) = delete; + MessageQueue& operator=(MessageQueue) = delete; + + void request_exit() { + message m = {-1, 0, 0, 0}; + send(m); + } + + void request_operation(int cmd, const cv::Mat& mat) { + sm_.store_mat(mat); + message m = {cmd, mat.rows, mat.cols, mat.type()}; + send(m); + } + + void reply_operation(int cmd, const cv::Mat& mat) { + request_operation(cmd, mat); + } + + void wait() { + timespec abs_timeout; + clock_gettime(CLOCK_REALTIME, &abs_timeout); + abs_timeout.tv_sec += 3; + ssize_t read_bytes = + mq_timedreceive(queue_desc_, reinterpret_cast(&last_message_), + sizeof(last_message_), nullptr, &abs_timeout); + if (read_bytes != sizeof(last_message_)) { + if (read_bytes == -1) { + throw ExceptionWithErrno("Could not receive message"); + } else { + throw std::runtime_error("Less bytes received than expected"); + } + } + } + + int last_cmd() const { return last_message_.cmd; } + + cv::Mat cv_mat_from_last_msg() const { + return sm_.cv_mat(last_message_.rows, last_message_.cols, + last_message_.type); + } + + private: + struct message { + int cmd; + int rows; + int cols; + int type; + }; + + static mqd_t open(const std::string& id) { + mqd_t qd = mq_open(id.c_str(), O_RDWR); + if (qd == static_cast(-1)) { + throw ExceptionWithErrno("Failed to open message queue, id:" + id); + } + + return qd; + } + static mqd_t unlink_and_open(const std::string& id) { + if (mq_unlink(id.c_str())) { + if (errno != ENOENT) { + throw ExceptionWithErrno("Cannot delete message queue, id: " + id); + } + } + + mq_attr attr = queue_attributes(); + mqd_t qd = mq_open(id.c_str(), O_RDWR | O_CREAT | O_EXCL, 0666, &attr); + if (qd == static_cast(-1)) { + throw ExceptionWithErrno("Failed to open message queue, id:" + id); + } + + return qd; + } + + void send(message& m) const { + if (mq_send(queue_desc_, reinterpret_cast(&m), sizeof(m), 0)) { + throw ExceptionWithErrno("Failed to send message on queue"); + } + } + + static mq_attr queue_attributes() { + mq_attr attr; + attr.mq_maxmsg = 1; + attr.mq_msgsize = sizeof(message); + return attr; + } + + const std::string id_; + mqd_t queue_desc_; + message last_message_; + SharedMemory& sm_; +}; // end of class MessageQueue + +class OpenedMessageQueue : public MessageQueue { + public: + explicit OpenedMessageQueue(const std::string& id, SharedMemory& sm) + : MessageQueue{id, sm} {} +}; // end of class OpenedMessageQueue + +class RecreatedMessageQueue : public MessageQueue { + public: + explicit RecreatedMessageQueue(const std::string& id, SharedMemory& sm) + : MessageQueue{id, sm} {} +}; // end of class RecreatedMessageQueue + +#endif // INTRINSICCV_OPENCV_CONFORMITY_COMMON_H_ diff --git a/conformity/opencv/manager.cpp b/conformity/opencv/manager.cpp new file mode 100644 index 000000000..2d7dc2dd3 --- /dev/null +++ b/conformity/opencv/manager.cpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include + +#include +#include + +#include "common.h" +#include "tests.h" + +int main(int argc, char** argv) { + if (argc < 2) { + std::cerr << "Error! Subordinate task is not defined as the first argument!" + << std::endl; + return 1; + } + + // Block USR1 signal as it terminates the process by default + sigset_t usr1_sigset; + sigemptyset(&usr1_sigset); + sigaddset(&usr1_sigset, SIGUSR1); + sigprocmask(SIG_BLOCK, &usr1_sigset, NULL); + + pid_t child_pid = fork(); + if (child_pid == 0) { + // Waiting for the initialization of manager task + timespec timeout = {3, 0}; + if (sigtimedwait(&usr1_sigset, NULL, &timeout) != SIGUSR1) { + std::cerr + << "Error! Wrong signal received or timeout reached in subordinate!" + << std::endl; + return 2; + } + // Starting subordinate task + execl(argv[1], argv[1], static_cast(NULL)); + throw ExceptionWithErrno("Cannot start subordinate executable"); + } + + RecreatedSharedMemory sm{SHM_ID, SHM_SIZE}; + RecreatedMessageQueue request_queue{REQUEST_MQ_ID, sm}; + RecreatedMessageQueue reply_queue{REPLY_MQ_ID, sm}; + + // Let subordinate know that init is done + kill(child_pid, SIGUSR1); + + run_tests(request_queue, reply_queue); + + // Wait for subordinate to exit + wait(NULL); + + std::cout << "Manager exits normally" << std::endl; +} diff --git a/conformity/opencv/subordinate.cpp b/conformity/opencv/subordinate.cpp new file mode 100644 index 000000000..e7c77f327 --- /dev/null +++ b/conformity/opencv/subordinate.cpp @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "common.h" +#include "tests.h" + +int main(void) { + OpenedSharedMemory sm{SHM_ID, SHM_SIZE}; + OpenedMessageQueue request_queue{REQUEST_MQ_ID, sm}; + OpenedMessageQueue reply_queue{REPLY_MQ_ID, sm}; + + wait_for_requests(request_queue, reply_queue); + + std::cout << "Subordinate exits normally" << std::endl; +} diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp new file mode 100644 index 000000000..0d3d1df68 --- /dev/null +++ b/conformity/opencv/tests.cpp @@ -0,0 +1,137 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "tests.h" + +#include +#include +#include +#include + +#include "opencv2/core.hpp" +#include "opencv2/imgproc.hpp" + +namespace ConformityUtils { + +template +static auto abs_diff(T a, T b) { + return a > b ? a - b : b - a; +} + +template +static bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) { + if (A.rows != B.rows || A.cols != B.cols || A.type() != B.type()) { + std::cout << "Matrix size/type mismatch" << std::endl; + return true; + } + + for (int i = 0; i < A.rows; ++i) { + for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) { + if (abs_diff(A.at(i, j), B.at(i, j)) > threshold) { + std::cout << "=== Mismatch at: " << i << " " << j << std::endl + << std::endl; + return true; + } + } + } + + return false; +} + +} // namespace ConformityUtils + +cv::Mat exec_float_to_int8_t(cv::Mat& input) { + cv::Mat result; + input.convertTo(result, CV_8SC1); + return result; +} + +bool test_float_to_int8_t(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, CV_32FC1); + rng.fill(input, cv::RNG::UNIFORM, -1000, 1000); + + cv::Mat manager_result = exec_float_to_int8_t(input); + + request_queue.request_operation(index, input); + reply_queue.wait(); + if (reply_queue.last_cmd() != index) { + throw std::runtime_error("Invalid reply from subordinate"); + } + + cv::Mat subord_result = reply_queue.cv_mat_from_last_msg(); + + if (ConformityUtils::are_matrices_different(0, manager_result, + subord_result)) { + std::cout << "[FAIL]" << std::endl; + std::cout << "height=" << x << std::endl; + std::cout << "width=" << y << std::endl; + std::cout << "=== Input Matrix:" << std::endl; + std::cout << input << std::endl << std::endl; + std::cout << "=== Manager result:" << std::endl; + std::cout << manager_result << std::endl << std::endl; + std::cout << "=== Subordinate result:" << std::endl; + std::cout << subord_result << std::endl << std::endl; + + return true; + } + } + } + + return false; +} + +#if SUBORDINATE +using test = std::pair; +#define TEST(name, x, exec_func) \ + { name, exec_func } +#else // MANAGER +using test = std::pair; +#define TEST(name, test_func, x) \ + { name, test_func } +#endif + +// clang-format off +std::vector tests = { + TEST("Float32 to Int8", test_float_to_int8_t, exec_float_to_int8_t), +}; +// clang-format on + +#if SUBORDINATE +void wait_for_requests(OpenedMessageQueue& request_queue, + OpenedMessageQueue& reply_queue) { + while (true) { + request_queue.wait(); + int cmd = request_queue.last_cmd(); + + if (cmd < 0) { + // Exit requested + break; + } + + if (cmd > static_cast(tests.size())) { + throw std::runtime_error("Invalid operation requested in subordinate"); + } + + cv::Mat input = request_queue.cv_mat_from_last_msg(); + cv::Mat result = tests[cmd].second(input); + reply_queue.reply_operation(cmd, result); + } +} +#else // MANAGER +void run_tests(RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + for (int i = 0; i < static_cast(tests.size()); ++i) { + std::cout << "Testing " + tests[i].first << std::endl; + if (tests[i].second(i, request_queue, reply_queue)) { + break; + } + } + request_queue.request_exit(); +} +#endif diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h new file mode 100644 index 000000000..8728d8c5f --- /dev/null +++ b/conformity/opencv/tests.h @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_ +#define INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_ + +#include "common.h" + +#if SUBORDINATE +void wait_for_requests(OpenedMessageQueue& request_queue, + OpenedMessageQueue& reply_queue); +#else // MANAGER +void run_tests(RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue); +#endif + +#endif // INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_ diff --git a/scripts/ci.sh b/scripts/ci.sh index e0c44cb0b..b81c4cf10 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -98,5 +98,9 @@ if [[ $(dpkg --print-architecture) = arm64 ]]; then # Check OpenCV-IntrinsicCV integration scripts/ci-opencv.sh fi +if [[ $(dpkg --print-architecture) = arm64 ]]; then + # Compare the library output with OpenCV implementation + scripts/run_opencv_conformity_checks.sh +fi exit $TESTRESULT diff --git a/scripts/format.sh b/scripts/format.sh index c50dfeda6..69953d462 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -29,10 +29,11 @@ INTRINSICCV_ROOT_PATH="$(realpath "${SCRIPT_PATH}"/..)" # ------------------------------------------------------------------------------ SOURCES="$(find \ - "${INTRINSICCV_ROOT_PATH}"/adapters \ - "${INTRINSICCV_ROOT_PATH}"/benchmark \ - "${INTRINSICCV_ROOT_PATH}"/intrinsiccv \ - "${INTRINSICCV_ROOT_PATH}"/test \ + "${INTRINSICCV_ROOT_PATH}/adapters" \ + "${INTRINSICCV_ROOT_PATH}/benchmark" \ + "${INTRINSICCV_ROOT_PATH}/intrinsiccv" \ + "${INTRINSICCV_ROOT_PATH}/test" \ + "${INTRINSICCV_ROOT_PATH}/conformity/opencv" \ \( -name \*.cpp -o -name \*.h -o -name \*.h.in \) \ -print)" diff --git a/scripts/run_opencv_conformity_checks.sh b/scripts/run_opencv_conformity_checks.sh new file mode 100755 index 000000000..bd58bd0ae --- /dev/null +++ b/scripts/run_opencv_conformity_checks.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +# +# SPDX-License-Identifier: Apache-2.0 + +set -exu + +: "${CLEAN:=OFF}" + +SCRIPT_PATH="$(realpath "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)")" + +SOURCE_PATH="${SCRIPT_PATH}/../conformity/opencv" +INTRINSICCV_SOURCE_PATH="${SCRIPT_PATH}/.." +BUILD_PATH="${SCRIPT_PATH}/../build/conformity" +OPENCV_DEFAULT_PATH="${BUILD_PATH}/opencv_default" +OPENCV_INTRINSICCV_PATH="${BUILD_PATH}/opencv_intrinsiccv" + +if [[ "${CLEAN}" == "ON" ]]; then + rm -rf "${BUILD_PATH}" +fi + +export LDFLAGS="--rtlib=compiler-rt -fuse-ld=lld" + +cmake -S "${SOURCE_PATH}" \ + -B "${OPENCV_DEFAULT_PATH}" \ + -G Ninja \ + -DWITH_INTRINSICCV=OFF +ninja -C "${OPENCV_DEFAULT_PATH}" subordinate + +cmake -S "${SOURCE_PATH}" \ + -B "${OPENCV_INTRINSICCV_PATH}" \ + -G Ninja \ + -DWITH_INTRINSICCV=ON \ + -DINTRINSICCV_SOURCE_PATH="${INTRINSICCV_SOURCE_PATH}" \ + -DINTRINSICCV_ENABLE_SVE2=ON \ + -DINTRINSICCV_ENABLE_SVE2_SELECTIVELY=OFF +ninja -C "${OPENCV_INTRINSICCV_PATH}" manager + +qemu-aarch64 -cpu cortex-a35 "${OPENCV_INTRINSICCV_PATH}/bin/manager" "${OPENCV_DEFAULT_PATH}/bin/subordinate" +qemu-aarch64 -cpu max,sve128=on,sme=off "${OPENCV_INTRINSICCV_PATH}/bin/manager" "${OPENCV_DEFAULT_PATH}/bin/subordinate" +qemu-aarch64 -cpu max,sve128=on,sme512=on "${OPENCV_INTRINSICCV_PATH}/bin/manager" "${OPENCV_DEFAULT_PATH}/bin/subordinate" -- GitLab From bd6f6301da3f1ddf7ca24247b73d06cd56154b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 3 Apr 2024 18:01:00 +0200 Subject: [PATCH 8/8] Implement float32 to uint8 type conversion --- adapters/opencv/intrinsiccv_hal.cpp | 6 +++ intrinsiccv/include/intrinsiccv/intrinsiccv.h | 8 +++- .../src/conversions/float_to_int_api.cpp | 39 ++++++++------- .../src/conversions/float_to_int_neon.cpp | 19 +++++--- intrinsiccv/src/conversions/float_to_int_sc.h | 48 ++++++++++++------- .../src/conversions/float_to_int_sme2.cpp | 24 ++++++---- .../src/conversions/float_to_int_sve2.cpp | 22 ++++++--- 7 files changed, 108 insertions(+), 58 deletions(-) diff --git a/adapters/opencv/intrinsiccv_hal.cpp b/adapters/opencv/intrinsiccv_hal.cpp index d22318ddc..c863fc9e5 100644 --- a/adapters/opencv/intrinsiccv_hal.cpp +++ b/adapters/opencv/intrinsiccv_hal.cpp @@ -678,6 +678,12 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, reinterpret_cast(src_data), src_step, reinterpret_cast(dst_data), dst_step, width, height)); } + // float32 to uint8 + if (src_depth == CV_32F && dst_depth == CV_8U) { + return convert_error(intrinsiccv_type_conversion_f32_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } } return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/intrinsiccv/include/intrinsiccv/intrinsiccv.h b/intrinsiccv/include/intrinsiccv/intrinsiccv.h index 3e5d94a2c..26eeddb8b 100644 --- a/intrinsiccv/include/intrinsiccv/intrinsiccv.h +++ b/intrinsiccv/include/intrinsiccv/intrinsiccv.h @@ -1268,8 +1268,8 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src, size_t width, size_t height, float scale, float shift); -/// Converts the elements in `src` from type `float` to type `int8_t`, -/// then stores the result in `dst`. +/// Converts the elements in `src` from a floating-point type to an integer +/// type, then stores the result in `dst`. /// /// Each resulting element is saturated, i.e. it is the smallest/largest /// number of the type of the element if the result would underflow/overflow. @@ -1290,6 +1290,10 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src, INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_s8, const float *src, size_t src_stride, int8_t *dst, size_t dst_stride, size_t width, size_t height); +/// @copydoc intrinsiccv_type_conversion_f32_s8 +INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_u8, + const float *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height); #ifdef __cplusplus } // extern "C" diff --git a/intrinsiccv/src/conversions/float_to_int_api.cpp b/intrinsiccv/src/conversions/float_to_int_api.cpp index 8a8a92f42..5503ecb8d 100644 --- a/intrinsiccv/src/conversions/float_to_int_api.cpp +++ b/intrinsiccv/src/conversions/float_to_int_api.cpp @@ -10,35 +10,42 @@ namespace intrinsiccv { namespace neon { -intrinsiccv_error_t type_conversion_float_to_int8_t( - const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, - size_t width, size_t height); +template +intrinsiccv_error_t type_conversion_float_to_int(const float* src, + size_t src_stride, T* dst, + size_t dst_stride, + size_t width, size_t height); } // namespace neon namespace sve2 { -intrinsiccv_error_t type_conversion_float_to_int8_t( - const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, - size_t width, size_t height); +template +intrinsiccv_error_t type_conversion_float_to_int(const float* src, + size_t src_stride, T* dst, + size_t dst_stride, + size_t width, size_t height); } // namespace sve2 namespace sme2 { -intrinsiccv_error_t type_conversion_float_to_int8_t( - const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, - size_t width, size_t height); +template +intrinsiccv_error_t type_conversion_float_to_int(const float* src, + size_t src_stride, T* dst, + size_t dst_stride, + size_t width, size_t height); } // namespace sme2 -#define INTRINSICCV_DEFINE_C_API(name, itype, otype) \ - INTRINSICCV_MULTIVERSION_C_API( \ - name, intrinsiccv::neon::type_conversion_##itype##_to_##otype, \ - INTRINSICCV_SVE2_IMPL_IF( \ - intrinsiccv::sve2::type_conversion_##itype##_to_##otype), \ - intrinsiccv::sme2::type_conversion_##itype##_to_##otype) +#define INTRINSICCV_DEFINE_C_API(name, type) \ + INTRINSICCV_MULTIVERSION_C_API( \ + name, intrinsiccv::neon::type_conversion_float_to_int, \ + INTRINSICCV_SVE2_IMPL_IF( \ + intrinsiccv::sve2::type_conversion_float_to_int), \ + intrinsiccv::sme2::type_conversion_float_to_int) -INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, float, int8_t); +INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, int8_t); +INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_u8, uint8_t); } // namespace intrinsiccv diff --git a/intrinsiccv/src/conversions/float_to_int_neon.cpp b/intrinsiccv/src/conversions/float_to_int_neon.cpp index 6d3fee394..abb3614c4 100644 --- a/intrinsiccv/src/conversions/float_to_int_neon.cpp +++ b/intrinsiccv/src/conversions/float_to_int_neon.cpp @@ -7,14 +7,19 @@ namespace intrinsiccv::neon { -intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t, - int8_t*, size_t, size_t, - size_t); - -intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t, - int8_t*, size_t, size_t, - size_t) { +template +intrinsiccv_error_t type_conversion_float_to_int(const float*, size_t, T*, + size_t, size_t, size_t) { return INTRINSICCV_ERROR_NOT_IMPLEMENTED; } +#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ + template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ + type_conversion_float_to_int(const float* src, size_t src_stride, \ + type* dst, size_t dst_stride, \ + size_t width, size_t height) + +INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t); +INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t); + } // namespace intrinsiccv::neon diff --git a/intrinsiccv/src/conversions/float_to_int_sc.h b/intrinsiccv/src/conversions/float_to_int_sc.h index 10f8f749a..67afd326a 100644 --- a/intrinsiccv/src/conversions/float_to_int_sc.h +++ b/intrinsiccv/src/conversions/float_to_int_sc.h @@ -5,31 +5,35 @@ #ifndef INTRINSICCV_FLOAT_TO_INT_SC_H #define INTRINSICCV_FLOAT_TO_INT_SC_H +#include + #include "intrinsiccv/intrinsiccv.h" #include "intrinsiccv/sve2.h" namespace INTRINSICCV_TARGET_NAMESPACE { +template class float_to_int_operation final { public: using SrcVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits; using SrcVectorType = typename SrcVecTraits::VectorType; - using DstVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits; + using DstVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits< + std::conditional_t, int32_t, uint32_t>>; using DstVectorType = typename DstVecTraits::VectorType; - using ContextType = INTRINSICCV_TARGET_NAMESPACE::Context; using VecTraits = SrcVecTraits; void process_row(size_t width, Columns src, - Columns dst) { + Columns dst) INTRINSICCV_STREAMING_COMPATIBLE { LoopUnroll{width, VecTraits::num_lanes()} .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE { svbool_t pg = VecTraits::svptrue(); - Context ctx{pg}; SrcVectorType src_vector1 = svld1(pg, &src[0]); SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1); - DstVectorType result_vector1 = vector_path(ctx, src_vector1); - DstVectorType result_vector2 = vector_path(ctx, src_vector2); + DstVectorType result_vector1 = + vector_path(pg, src_vector1); + DstVectorType result_vector2 = + vector_path(pg, src_vector2); svst1b(pg, &dst[0], result_vector1); svst1b_vnum(pg, &dst[0], 1, result_vector2); src += ptrdiff_t(step); @@ -38,24 +42,22 @@ class float_to_int_operation final { .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE { size_t index = 0; svbool_t pg = VecTraits::svwhilelt(index, length); - Context ctx{pg}; while (svptest_first(VecTraits::svptrue(), pg)) { SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]); - DstVectorType result_vector = vector_path(ctx, src_vector); + DstVectorType result_vector = + vector_path(pg, src_vector); svst1b(pg, &dst[ptrdiff_t(index)], result_vector); // Update loop counter and calculate the next governing predicate. index += VecTraits::num_lanes(); pg = VecTraits::svwhilelt(index, length); - ctx.set_predicate(pg); } }); } private: - DstVectorType vector_path(ContextType ctx, SrcVectorType src) + template , int> = 0> + DstVectorType vector_path(svbool_t& pg, SrcVectorType src) INTRINSICCV_STREAMING_COMPATIBLE { - svbool_t pg = ctx.predicate(); - src = svrinti_f32_x(pg, src); svbool_t less = svcmplt_n_f32(pg, src, -128.0); @@ -66,19 +68,31 @@ class float_to_int_operation final { return svcvt_s32_f32_x(pg, src); } -}; // end of class float_to_int_operation -static intrinsiccv_error_t type_conversion_float_to_int8_t_sc( - const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, + template , int> = 0> + DstVectorType vector_path(svbool_t& pg, SrcVectorType src) + INTRINSICCV_STREAMING_COMPATIBLE { + src = svrinti_f32_x(pg, src); + + svbool_t greater = svcmpgt_n_f32(pg, src, 255.0); + src = svdup_n_f32_m(src, greater, 255.0); + + return svcvt_u32_f32_x(pg, src); + } +}; // end of class float_to_int_operation + +template +static intrinsiccv_error_t type_conversion_float_to_int_sc( + const float* src, size_t src_stride, T* dst, size_t dst_stride, size_t width, size_t height) INTRINSICCV_STREAMING_COMPATIBLE { CHECK_POINTER_AND_STRIDE(src, src_stride); CHECK_POINTER_AND_STRIDE(dst, dst_stride); CHECK_IMAGE_SIZE(width, height); - float_to_int_operation operation; + float_to_int_operation operation; Rectangle rect{width, height}; Rows src_rows{src, src_stride}; - Rows dst_rows{dst, dst_stride}; + Rows dst_rows{dst, dst_stride}; zip_rows(operation, rect, src_rows, dst_rows); return INTRINSICCV_OK; diff --git a/intrinsiccv/src/conversions/float_to_int_sme2.cpp b/intrinsiccv/src/conversions/float_to_int_sme2.cpp index af389a575..9b2c88182 100644 --- a/intrinsiccv/src/conversions/float_to_int_sme2.cpp +++ b/intrinsiccv/src/conversions/float_to_int_sme2.cpp @@ -6,16 +6,22 @@ namespace intrinsiccv::sme2 { -INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t -type_conversion_float_to_int8_t(const float*, size_t, int8_t*, size_t, size_t, - size_t); - -INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t -type_conversion_float_to_int8_t(const float* src, size_t src_stride, - int8_t* dst, size_t dst_stride, size_t width, - size_t height) { - return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride, +template +intrinsiccv_error_t type_conversion_float_to_int(const float* src, + size_t src_stride, T* dst, + size_t dst_stride, + size_t width, size_t height) { + return type_conversion_float_to_int_sc(src, src_stride, dst, dst_stride, width, height); } +#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ + template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ + type_conversion_float_to_int(const float* src, size_t src_stride, \ + type* dst, size_t dst_stride, \ + size_t width, size_t height) + +INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t); +INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t); + } // namespace intrinsiccv::sme2 diff --git a/intrinsiccv/src/conversions/float_to_int_sve2.cpp b/intrinsiccv/src/conversions/float_to_int_sve2.cpp index f58ea271e..9b9efdcdd 100644 --- a/intrinsiccv/src/conversions/float_to_int_sve2.cpp +++ b/intrinsiccv/src/conversions/float_to_int_sve2.cpp @@ -6,14 +6,22 @@ namespace intrinsiccv::sve2 { -INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t( - const float*, size_t, int8_t*, size_t, size_t, size_t); - -INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t( - const float* src, size_t src_stride, int8_t* dst, size_t dst_stride, - size_t width, size_t height) { - return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride, +template +intrinsiccv_error_t type_conversion_float_to_int(const float* src, + size_t src_stride, T* dst, + size_t dst_stride, + size_t width, size_t height) { + return type_conversion_float_to_int_sc(src, src_stride, dst, dst_stride, width, height); } +#define INTRINSICCV_INSTANTIATE_TEMPLATE(type) \ + template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t \ + type_conversion_float_to_int(const float* src, size_t src_stride, \ + type* dst, size_t dst_stride, \ + size_t width, size_t height) + +INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t); +INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t); + } // namespace intrinsiccv::sve2 -- GitLab