From f45974ac71e59f30959363d223590b8e7ba6d28b Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Fri, 19 Apr 2024 11:12:06 +0000 Subject: [PATCH 01/13] Revert "Delete float<->int conversion code" This reverts commit 4dd9933ff902bfa9ffabd58c71482bd16422ac69. --- adapters/opencv/kleidicv_hal.cpp | 27 ++ conformity/opencv/CMakeLists.txt | 2 + conformity/opencv/test_float_conv.cpp | 206 +++++++++ conformity/opencv/test_float_conv.h | 14 + conformity/opencv/tests.cpp | 2 + kleidicv/include/kleidicv/kleidicv.h | 59 +++ kleidicv/src/conversions/float_conv_api.cpp | 71 +++ kleidicv/src/conversions/float_conv_neon.cpp | 27 ++ kleidicv/src/conversions/float_conv_sc.h | 166 +++++++ kleidicv/src/conversions/float_conv_sme2.cpp | 28 ++ kleidicv/src/conversions/float_conv_sve2.cpp | 28 ++ test/api/test_float_conv.cpp | 438 +++++++++++++++++++ 12 files changed, 1068 insertions(+) create mode 100644 conformity/opencv/test_float_conv.cpp create mode 100644 conformity/opencv/test_float_conv.h create mode 100644 kleidicv/src/conversions/float_conv_api.cpp create mode 100644 kleidicv/src/conversions/float_conv_neon.cpp create mode 100644 kleidicv/src/conversions/float_conv_sc.h create mode 100644 kleidicv/src/conversions/float_conv_sme2.cpp create mode 100644 kleidicv/src/conversions/float_conv_sve2.cpp create mode 100644 test/api/test_float_conv.cpp diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 04b672a08..627fb545d 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -707,6 +707,33 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, } } + // type conversion only + if (scale == 1.0 && shift == 0.0) { + // float32 to int8 + if (src_depth == CV_32F && dst_depth == CV_8S) { + return convert_error(kleidicv_float_conversion_f32_s8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + // float32 to uint8 + if (src_depth == CV_32F && dst_depth == CV_8U) { + return convert_error(kleidicv_float_conversion_f32_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + // int8 to float32 + if (src_depth == CV_8S && dst_depth == CV_32F) { + return convert_error(kleidicv_float_conversion_s8_f32( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + // uint8 to float32 + if (src_depth == CV_8U && dst_depth == CV_32F) { + return convert_error(kleidicv_float_conversion_u8_f32( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + } return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 4fc695794..31fc56248 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -76,6 +76,8 @@ add_executable( test_rgb2yuv.cpp test_sobel.cpp test_exp.cpp + test_float_conv.cpp + ) target_link_libraries( diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp new file mode 100644 index 000000000..8f7ca6a81 --- /dev/null +++ b/conformity/opencv/test_float_conv.cpp @@ -0,0 +1,206 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "test_float_conv.h" + +#include + +float floatval(uint32_t v) { + static_assert(sizeof(float) == 4); + return *reinterpret_cast(&v); +} + +float quietNaN = floatval(0x7FC00000); +float signalingNaN = floatval(0x7FA00000); +float posInfinity = floatval(0x7F800000); +float negInfinity = floatval(0xFF800000); + +float minusNaN = floatval(0xFF800001); +float plusNaN = floatval(0x7F800001); +float plusZero = floatval(0x00000000); +float minusZero = floatval(0x80000000); + +float oneNaN = floatval(0x7FC00001); +float zeroDivZero = floatval(0xFFC00000); +float floatMin = floatval(0x00800000); +float floatMax = floatval(0x7F7FFFFF); + +float posSubnormalMin = floatval(0x00000001); +float posSubnormalMax = floatval(0x007FFFFF); +float negSubnormalMin = floatval(0x80000001); +float negSubnormalMax = floatval(0x807FFFFF); + +template +cv::Mat exec_float32_to_int8(cv::Mat& input) { + cv::Mat result; + input.convertTo(result, Signed ? CV_8SC1 : CV_8UC1); + return result; +} + +cv::Mat exec_int8_to_float32(cv::Mat& input) { + cv::Mat result; + input.convertTo(result, CV_32FC1); + return result; +} + +#if MANAGER +template +bool test_float32_to_int8_random(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, CV_32FC(Channels)); + rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000); + + cv::Mat actual = exec_float32_to_int8(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + } + } + + return false; +} + +template +bool test_int8_to_float32_random(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, Signed ? CV_8SC(Channels) : CV_8UC(Channels)); + rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000); + + cv::Mat actual = exec_int8_to_float32(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + } + } + + return false; +} + +static constexpr int custom_data_float_height = 8; +static constexpr int custom_data_float_width = 4; + +static float + custom_data_float[custom_data_float_height * custom_data_float_width] = { + // clang-format off + quietNaN, signalingNaN, posInfinity, negInfinity, + minusNaN, plusNaN, plusZero, minusZero, + oneNaN, zeroDivZero, floatMin, floatMax, + posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax, + 1111.11, -1112.22, 113.33, 114.44, + 111.51, 112.62, 113.73, 114.84, + 126.66, 127.11, 128.66, 129.11, + 11.5, 12.5, -11.5, -12.5, + // clang-format on +}; + +static constexpr int custom_data_int8_height = 1; +static constexpr int custom_data_int8_width = 7; + +static int8_t + custom_data_int8[custom_data_int8_height * custom_data_int8_width] = { + // clang-format off + -128, -127, -1, 0, 1, 126, 127 + // clang-format on +}; + +static uint8_t + custom_data_uint8[custom_data_int8_height * custom_data_int8_width] = { + // clang-format off + 0, 1, 126, 127, 128, 254, 255 + // clang-format on +}; + +template +bool test_float32_to_int8_custom(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat input(custom_data_float_height, custom_data_float_width, CV_32FC1, + custom_data_float); + + cv::Mat actual = exec_float32_to_int8(input); + cv::Mat expected = + get_expected_from_subordinate(index, request_queue, reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(custom_data_float_height, custom_data_float_width, + input, actual, expected); + return true; + } + + return false; +} + +template +bool test_int8_to_float32_custom(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat input(custom_data_int8_height, custom_data_int8_width, + Signed ? CV_8SC1 : CV_8UC1, + Signed ? static_cast(custom_data_int8) + : static_cast(custom_data_uint8)); + + cv::Mat actual = exec_int8_to_float32(input); + cv::Mat expected = + get_expected_from_subordinate(index, request_queue, reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(custom_data_int8_height, custom_data_int8_width, input, + actual, expected); + return true; + } + + return false; +} +#endif + +std::vector& float_conversion_tests_get() { + // clang-format off + static std::vector tests = { + TEST("Float32 to Signed Int8, fill, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, fill, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, fill, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, fill, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), + + TEST("Float32 to Unsigned Int8, fill, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, fill, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, fill, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, fill, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), + + TEST("Float32 to Signed Int8, custom (special)", test_float32_to_int8_custom, exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, custom (special)", test_float32_to_int8_custom, exec_float32_to_int8), + + TEST("Signed Int8 to Float32, fill, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, fill, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, fill, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, fill, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), + + TEST("Unsigned Int8 to Float32, fill, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, fill, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, fill, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, fill, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), + + TEST("Signed Int8 to Float32, custom", test_int8_to_float32_custom, exec_int8_to_float32), + TEST("Unigned Int8 to Float32, custom", test_int8_to_float32_custom, exec_int8_to_float32), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/test_float_conv.h b/conformity/opencv/test_float_conv.h new file mode 100644 index 000000000..9e4c40be6 --- /dev/null +++ b/conformity/opencv/test_float_conv.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ + +#include + +#include "tests.h" + +std::vector& float_conversion_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index cb5fe944c..8064a6565 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -12,6 +12,7 @@ #include "opencv2/imgproc.hpp" #include "test_binary_op.h" #include "test_exp.h" +#include "test_float_conv.h" #include "test_gaussian_blur.h" #include "test_min_max.h" #include "test_rgb2yuv.h" @@ -34,6 +35,7 @@ std::vector all_tests = merge_tests({ rgb2yuv_tests_get, sobel_tests_get, exp_tests_get, + float_conversion_tests_get, }); #if MANAGER diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 88dfdc6bd..b27f58582 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1487,6 +1487,65 @@ KLEIDICV_API_DECLARATION(kleidicv_exp_f32, const float *src, size_t src_stride, float *dst, size_t dst_stride, size_t width, size_t height); +/// Converts the elements in `src` from a floating-point type to an integer +/// type, then stores the result in `dst`. +/// +/// Each resulting element is saturated, i.e. it is the smallest/largest +/// number of the type of the element if the `src` data type cannot be +/// represented as the `dst` type. In case of some special values, such as the +/// different variations of `NaN`, the result is `0`. Source and destination +/// data length is `width` * `height`. Number of elements is limited to @ref +/// KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. +/// Must not be less than width * sizeof(type). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must not be less than width * sizeof(type). +/// @param width Number of elements in a row. +/// @param height Number of rows in the data. +/// +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_f32_s8, const float *src, + size_t src_stride, int8_t *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_float_conversion_f32_s8 +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_f32_u8, const float *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); + +/// Converts the elements in `src` from an integer type to a floating-point +/// type, then stores the result in `dst`. +/// +/// Each resulting element is saturated, i.e. it is the smallest/largest +/// number of the type of the element if the `src` data type cannot be +/// represented as the `dst` type. Source and destination data length is +/// `width` * `height`. Number of elements is limited to @ref +/// KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. Must +/// not be less than width * sizeof(type). +/// Must be a multiple of sizeof(type). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. Must +/// not be less than width * sizeof(type). +/// Must be a multiple of sizeof(type). +/// @param width Number of pixels in a row. +/// @param height Number of rows in the data. +/// +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_s8_f32, const int8_t *src, + size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_float_conversion_s8_f32 +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_u8_f32, const uint8_t *src, + size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv/src/conversions/float_conv_api.cpp b/kleidicv/src/conversions/float_conv_api.cpp new file mode 100644 index 000000000..0f3fdea2d --- /dev/null +++ b/kleidicv/src/conversions/float_conv_api.cpp @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/types.h" + +namespace kleidicv { + +namespace neon { + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace neon + +namespace sve2 { + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace sve2 + +namespace sme2 { + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace sme2 + +#ifdef KLEIDICV_HAVE_SVE2 +#define SVE2_FUNC_POINTER(name, itype, otype) \ + [[maybe_unused]] static auto sve2_func_##itype##_##otype = \ + kleidicv::sve2::float_conversion; +#else +#define SVE2_FUNC_POINTER(name, itype, otype) +#endif // KLEIDICV_HAVE_SVE2 + +#ifdef KLEIDICV_HAVE_SME2 +#define SME2_FUNC_POINTER(name, itype, otype) \ + static auto sme2_func_##itype##_##otype = \ + kleidicv::sme2::float_conversion; +#else +#define SME2_FUNC_POINTER(name, itype, otype) +#endif // KLEIDICV_HAVE_SME2 + +// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) +#define KLEIDICV_DEFINE_C_API(name, itype, otype) \ + static auto neon_func_##itype##_##otype = \ + kleidicv::neon::float_conversion; \ + SVE2_FUNC_POINTER(name, itype, otype); \ + SME2_FUNC_POINTER(name, itype, otype); \ + KLEIDICV_MULTIVERSION_C_API( \ + name, neon_func_##itype##_##otype, \ + KLEIDICV_SVE2_IMPL_IF(sve2_func_##itype##_##otype), \ + sme2_func_##itype##_##otype) +// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) + +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_f32_s8, float, int8_t); +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_f32_u8, float, uint8_t); +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_s8_f32, int8_t, float); +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_u8_f32, uint8_t, float); + +} // namespace kleidicv diff --git a/kleidicv/src/conversions/float_conv_neon.cpp b/kleidicv/src/conversions/float_conv_neon.cpp new file mode 100644 index 000000000..b6e1fe0a8 --- /dev/null +++ b/kleidicv/src/conversions/float_conv_neon.cpp @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +template +kleidicv_error_t float_conversion(const InputType*, size_t, OutputType*, size_t, + size_t, size_t) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + float_conversion(const itype* src, size_t src_stride, \ + otype* dst, size_t dst_stride, size_t width, \ + size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float); + +} // namespace kleidicv::neon diff --git a/kleidicv/src/conversions/float_conv_sc.h b/kleidicv/src/conversions/float_conv_sc.h new file mode 100644 index 000000000..54a5f7471 --- /dev/null +++ b/kleidicv/src/conversions/float_conv_sc.h @@ -0,0 +1,166 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_FLOAT_CONV_SC_H +#define KLEIDICV_FLOAT_CONV_SC_H + +#include +#include + +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +template +class float_conversion_operation; + +template +class float_conversion_operation { + public: + using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits< + std::conditional_t, int32_t, uint32_t>>; + using IntermediateVectorType = typename IntermediateVecTraits::VectorType; + + void process_row(size_t width, Columns src, + Columns dst) KLEIDICV_STREAMING_COMPATIBLE { + LoopUnroll{width, SrcVecTraits::num_lanes()} + .unroll_twice([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = SrcVecTraits::svptrue(); + SrcVectorType src_vector1 = svld1(pg, &src[0]); + SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1); + IntermediateVectorType result_vector1 = + vector_path(pg, src_vector1); + IntermediateVectorType result_vector2 = + vector_path(pg, src_vector2); + svst1b(pg, &dst[0], result_vector1); + svst1b_vnum(pg, &dst[0], 1, result_vector2); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE { + size_t index = 0; + svbool_t pg = SrcVecTraits::svwhilelt(index, length); + while (svptest_first(SrcVecTraits::svptrue(), pg)) { + SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]); + IntermediateVectorType result_vector = + vector_path(pg, src_vector); + svst1b(pg, &dst[ptrdiff_t(index)], result_vector); + // Update loop counter and calculate the next governing predicate. + index += SrcVecTraits::num_lanes(); + pg = SrcVecTraits::svwhilelt(index, length); + } + }); + } + + private: + template < + typename O, + std::enable_if_t && std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src) + KLEIDICV_STREAMING_COMPATIBLE { + constexpr float min_val = std::numeric_limits::min(); + constexpr float max_val = std::numeric_limits::max(); + + src = svrinti_f32_x(pg, src); + + svbool_t less = svcmplt_n_f32(pg, src, min_val); + src = svdup_n_f32_m(src, less, min_val); + + svbool_t greater = svcmpgt_n_f32(pg, src, max_val); + src = svdup_n_f32_m(src, greater, max_val); + + return svcvt_s32_f32_x(pg, src); + } + + template < + typename O, + std::enable_if_t && !std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src) + KLEIDICV_STREAMING_COMPATIBLE { + constexpr float max_val = std::numeric_limits::max(); + + src = svrinti_f32_x(pg, src); + + svbool_t greater = svcmpgt_n_f32(pg, src, max_val); + src = svdup_n_f32_m(src, greater, max_val); + + return svcvt_u32_f32_x(pg, src); + } +}; // end of class float_conversion_operation + +template +class float_conversion_operation { + public: + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, VecTraits::num_lanes()} + .unroll_twice([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = VecTraits::svptrue(); + VectorType dst_vector1 = vector_path(pg, &src[0]); + VectorType dst_vector2 = vector_path( + pg, &src.at(ptrdiff_t(VecTraits::num_lanes()))[0]); + svst1(pg, &dst[0], dst_vector1); + svst1_vnum(pg, &dst[0], 1, dst_vector2); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE { + size_t index = 0; + svbool_t pg = VecTraits::svwhilelt(index, length); + while (svptest_first(VecTraits::svptrue(), pg)) { + VectorType dst_vector = + vector_path(pg, &src[ptrdiff_t(index)]); + svst1(pg, &dst[ptrdiff_t(index)], dst_vector); + // Update loop counter and calculate the next governing predicate. + index += VecTraits::num_lanes(); + pg = VecTraits::svwhilelt(index, length); + } + }); + } + + private: + template < + typename I, + std::enable_if_t && std::is_signed_v, int> = 0> + VectorType vector_path(svbool_t& pg, + const I* src) KLEIDICV_STREAMING_COMPATIBLE { + svint32_t src_vector = svld1sb_s32(pg, src); + return svcvt_f32_s32_x(pg, src_vector); + } + + template < + typename I, + std::enable_if_t && !std::is_signed_v, int> = 0> + VectorType vector_path(svbool_t& pg, + const I* src) KLEIDICV_STREAMING_COMPATIBLE { + svuint32_t src_vector = svld1ub_u32(pg, src); + return svcvt_f32_u32_x(pg, src_vector); + } +}; // end of class float_conversion_operation + +template +static kleidicv_error_t float_conversion_sc( + const InputType* src, size_t src_stride, OutputType* dst, size_t dst_stride, + size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + float_conversion_operation operation; + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + zip_rows(operation, rect, src_rows, dst_rows); + + return KLEIDICV_OK; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_FLOAT_CONV_SC_H diff --git a/kleidicv/src/conversions/float_conv_sme2.cpp b/kleidicv/src/conversions/float_conv_sme2.cpp new file mode 100644 index 000000000..49832008a --- /dev/null +++ b/kleidicv/src/conversions/float_conv_sme2.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "float_conv_sc.h" + +namespace kleidicv::sme2 { + +template +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +float_conversion(const InputType* src, size_t src_stride, OutputType* dst, + size_t dst_stride, size_t width, size_t height) { + return float_conversion_sc(src, src_stride, dst, + dst_stride, width, height); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + float_conversion(const itype* src, size_t src_stride, \ + otype* dst, size_t dst_stride, size_t width, \ + size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float); + +} // namespace kleidicv::sme2 diff --git a/kleidicv/src/conversions/float_conv_sve2.cpp b/kleidicv/src/conversions/float_conv_sve2.cpp new file mode 100644 index 000000000..6bbd5d72e --- /dev/null +++ b/kleidicv/src/conversions/float_conv_sve2.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "float_conv_sc.h" + +namespace kleidicv::sve2 { + +template +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +float_conversion(const InputType* src, size_t src_stride, OutputType* dst, + size_t dst_stride, size_t width, size_t height) { + return float_conversion_sc(src, src_stride, dst, + dst_stride, width, height); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + float_conversion(const itype* src, size_t src_stride, \ + otype* dst, size_t dst_stride, size_t width, \ + size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float); + +} // namespace kleidicv::sve2 diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp new file mode 100644 index 000000000..49fdde174 --- /dev/null +++ b/test/api/test_float_conv.cpp @@ -0,0 +1,438 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/operation.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" +#include "test_config.h" + +#define KLEIDICV_FLOAT_CONVERSION(itype, itype_name, otype, otype_name) \ + KLEIDICV_API_DIFFERENT_IO_TYPES( \ + float_conversion, kleidicv_float_conversion_##itype_name##_##otype_name, \ + itype, otype) + +KLEIDICV_FLOAT_CONVERSION(float, f32, int8_t, s8); +KLEIDICV_FLOAT_CONVERSION(float, f32, uint8_t, u8); +KLEIDICV_FLOAT_CONVERSION(int8_t, s8, float, f32); +KLEIDICV_FLOAT_CONVERSION(uint8_t, u8, float, f32); + +template +class FloatConversionTest final { + private: + template + static constexpr T min() { + return std::numeric_limits::min(); + } + + template + static constexpr T max() { + return std::numeric_limits::max(); + } + + struct Elements { + size_t width; + size_t height; + + std::vector> source_rows; + std::vector> expected_rows; + + Elements(size_t _width, size_t _height, + std::vector>&& _source_rows, + std::vector>&& _expected_rows) + : width(_width), + height(_height), + source_rows(std::move(_source_rows)), + expected_rows(std::move(_expected_rows)) {} + }; + + struct Values { + InputType source; + OutputType expected; + }; + + static constexpr uint32_t quietNaN = 0x7FC00000; + static constexpr uint32_t signalingNaN = 0x7FA00000; + static constexpr uint32_t posInfinity = 0x7F800000; + static constexpr uint32_t negInfinity = 0xFF800000; + + static constexpr uint32_t minusNaN = 0xFF800001; + static constexpr uint32_t plusNaN = 0x7F800001; + static constexpr uint32_t plusZero = 0x00000000; + static constexpr uint32_t minusZero = 0x80000000; + + static constexpr uint32_t oneNaN = 0x7FC00001; + static constexpr uint32_t zeroDivZero = 0xFFC00000; + static constexpr uint32_t floatMin = 0x00800000; + static constexpr uint32_t floatMax = 0x7F7FFFFF; + + static constexpr uint32_t posSubnormalMin = 0x00000001; + static constexpr uint32_t posSubnormalMax = 0x007FFFFF; + static constexpr uint32_t negSubnormalMin = 0x80000001; + static constexpr uint32_t negSubnormalMax = 0x807FFFFF; + + static constexpr float floatval(uint32_t v) { + static_assert(sizeof(float) == 4); + KLEIDICV_NO_STRICT_ALIASING_BEGIN + return *reinterpret_cast(&v); + KLEIDICV_NO_STRICT_ALIASING_END + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Elements& get_custom_elements() { + static const Elements kTestElements = { + // clang-format off + 4, 8, + {{ + { floatval(quietNaN), floatval(signalingNaN), floatval(posInfinity), floatval(negInfinity) }, + { floatval(minusNaN), floatval(plusNaN), floatval(plusZero), floatval(minusZero) }, + { floatval(oneNaN), floatval(zeroDivZero), floatval(floatMin), floatval(floatMax) }, + { floatval(posSubnormalMin), floatval(posSubnormalMax), floatval(negSubnormalMin), floatval(negSubnormalMax) }, + { 1111.11, -1112.22, 113.33, 114.44 }, + { 111.51, 112.62, 113.73, 114.84 }, + { 126.66, 127.11, 128.66, 129.11 }, + { 11.5, 12.5, -11.5, -12.5 } + }}, + {{ + { 0, 0, 127, -128 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 127 }, + { 0, 0, 0, 0 }, + { 127, -128, 113, 114 }, + { 112, 113, 114, 115 }, + { 127, 127, 127, 127 }, + { 12, 12, -12, -12 } + }} + // clang-format on + }; + return kTestElements; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Elements& get_custom_elements() { + static const Elements kTestElements = { + // clang-format off + 4, 8, + {{ + { floatval(quietNaN), floatval(signalingNaN), floatval(posInfinity), floatval(negInfinity) }, + { floatval(minusNaN), floatval(plusNaN), floatval(plusZero), floatval(minusZero) }, + { floatval(oneNaN), floatval(zeroDivZero), floatval(floatMin), floatval(floatMax) }, + { floatval(posSubnormalMin), floatval(posSubnormalMax), floatval(negSubnormalMin), floatval(negSubnormalMax) }, + { 1111.11, -1112.22, 113.33, 114.44 }, + { 111.51, 112.62, 113.73, 114.84 }, + { 126.66, 127.11, 128.66, 129.11 }, + { 11.5, 12.5, -11.5, -12.5 } + }}, + {{ + { 0, 0, 255, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 255 }, + { 0, 0, 0, 0 }, + { 255, 0, 113, 114 }, + { 112, 113, 114, 115 }, + { 127, 127, 129, 129 }, + { 12, 12, 0, 0 } + }} + // clang-format on + }; + return kTestElements; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Elements& get_custom_elements() { + static const Elements kTestElements = { + // clang-format off + 5, 1, + {{ + { min(), min() + 1, 0, max() - 1, max() } + }}, + {{ + { static_cast(min()), static_cast(min()) + 1.0, 0, + static_cast(max()) - 1.0, static_cast(max()) } + }} + // clang-format on + }; + return kTestElements; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + 10.67F, 11 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + 11, 11.0 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + void calculate_expected(const test::Array2D& source, + test::Array2D& expected) { + for (size_t hindex = 0; hindex < source.height(); ++hindex) { + for (size_t vindex = 0; vindex < source.width(); ++vindex) { + O calculated = 0; + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + I result = *source.at(hindex, vindex); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + if (result > max()) { + calculated = max(); + } else if (result < min()) { + calculated = min(); + } else { + calculated = result; + } + *expected.at(hindex, vindex) = calculated; + } + } + } + + template , bool> = true, + std::enable_if_t, bool> = true> + void calculate_expected(const test::Array2D& source, + test::Array2D& expected) { + for (size_t hindex = 0; hindex < source.height(); ++hindex) { + for (size_t vindex = 0; vindex < source.width(); ++vindex) { + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + *expected.at(hindex, vindex) = *source.at(hindex, vindex); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + } + } + } + + template + size_t get_linear_height(size_t width, size_t minimum_size) { + size_t image_size = + std::max(minimum_size, static_cast(max() - min())); + size_t height = image_size / width + 1; + + return height; + } + + template + std::tuple, test::Array2D, test::Array2D> + get_linear_arrays(size_t width, size_t height) { + test::Array2D source(width, height, 1, 1); + test::Array2D expected(width, height, 1, 1); + test::Array2D actual(width, height, 1, 1); + + if constexpr (std::is_same_v && std::is_integral_v) { + test::GenerateLinearSeries generator(min()); + source.fill(generator); + } else if constexpr (std::is_integral_v && std::is_same_v) { + test::GenerateLinearSeries generator(min()); + source.fill(generator); + } else { + static_assert(sizeof(I) == 0 && sizeof(O) == 0, "should never happen"); + } + + calculate_expected(source, expected); + + return {source, expected, actual}; + } + + public: + // minimum_size set by caller to trigger the 'big' conversion path. + template + void test_linear(size_t width, size_t minimum_size = 1) { + size_t height = 0; + if constexpr (std::is_same_v && std::is_integral_v) { + height = get_linear_height(width, minimum_size); + } else if constexpr (std::is_integral_v && std::is_same_v) { + height = get_linear_height(width, minimum_size); + } else { + static_assert(sizeof(I) == 0 && sizeof(O) == 0, "should never happen"); + } + + auto arrays = get_linear_arrays(width, height); + + test::Array2D& source = std::get<0>(arrays); + test::Array2D& expected = std::get<1>(arrays); + test::Array2D& actual = std::get<2>(arrays); + + ASSERT_EQ(KLEIDICV_OK, (float_conversion()( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + void test_custom() { + auto elements_list = get_custom_elements(); + const size_t& width = elements_list.width; + const size_t& height = elements_list.height; + + test::Array2D source(width, height); + test::Array2D expected(width, height); + test::Array2D actual(width, height); + + for (size_t i = 0; i < height; i++) { + source.set(i, 0, elements_list.source_rows[i]); + expected.set(i, 0, elements_list.expected_rows[i]); + } + + ASSERT_EQ(KLEIDICV_OK, (float_conversion()( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + void test_sizes(const size_t width, const size_t height) { + auto values_list = get_values(); + + test::Array2D source(width, height, 1, 1); + test::Array2D expected(width, height, 1, 1); + test::Array2D actual(width, height, 1, 1); + + source.fill(values_list.source); + expected.fill(values_list.expected); + + actual.fill(0); + + ASSERT_EQ(KLEIDICV_OK, (float_conversion()( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } +}; // end of class FloatConversionTest + +template +class FloatConversion : public testing::Test {}; + +using ElementTypes = + ::testing::Types, std::pair, + std::pair, std::pair>; + +// Tests kleidicv_float_conversion API. +TYPED_TEST_SUITE(FloatConversion, ElementTypes); + +TYPED_TEST(FloatConversion, NullPointer) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + InputType src[1] = {}; + OutputType dst[1]; + test::test_null_args(float_conversion(), src, + sizeof(InputType), dst, sizeof(OutputType), 1, 1); +} + +TYPED_TEST(FloatConversion, OversizeImage) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + InputType src[1] = {}; + OutputType dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + (float_conversion()( + src, sizeof(InputType), dst, sizeof(OutputType), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1))); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + (float_conversion()( + src, sizeof(InputType), dst, sizeof(OutputType), 1, + KLEIDICV_MAX_IMAGE_PIXELS + 1))); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + (float_conversion()( + src, sizeof(TypeParam), dst, sizeof(OutputType), + KLEIDICV_MAX_IMAGE_PIXELS + 1, KLEIDICV_MAX_IMAGE_PIXELS + 1))); +} + +TYPED_TEST(FloatConversion, Scalar) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{} + .template test_linear( + test::Options::vector_length() - 1); +} +TYPED_TEST(FloatConversion, Vector) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{} + .template test_linear( + test::Options::vector_length() * 2); +} +TYPED_TEST(FloatConversion, Custom) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_custom(); +} +TYPED_TEST(FloatConversion, CustomFits128VectorSize) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(4, 1); +} +TYPED_TEST(FloatConversion, CustomFits128VectorSize2x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(4, 2); +} +TYPED_TEST(FloatConversion, CustomFits128VectorSize3x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(4, 3); +} +TYPED_TEST(FloatConversion, CustomFits512VectorSize) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(4, 4); +} +TYPED_TEST(FloatConversion, CustomFits512VectorSize2x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(4, 8); +} +TYPED_TEST(FloatConversion, CustomFits512VectorSize3x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(6, 8); +} +TYPED_TEST(FloatConversion, Custom128OneRemaining) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(1, 17); +} +TYPED_TEST(FloatConversion, Custom128AllButOneRemaining) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(5, 3); +} +TYPED_TEST(FloatConversion, CustomAboutHalfRemaining) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(19, 2); +} +TYPED_TEST(FloatConversion, CustomEmpty) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(0, 0); +} +TYPED_TEST(FloatConversion, CustomOne) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizes(1, 1); +} -- GitLab From 13519f461b93c5cd2c9f26176a55730df530c75f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Mon, 29 Apr 2024 12:35:51 +0200 Subject: [PATCH 02/13] Refactor custom float values Prefer using constants from std::numeric_limits rather than raw values directly. Use memcpy for type punning from float. --- conformity/opencv/test_float_conv.cpp | 29 ++++++------ test/api/test_float_conv.cpp | 66 +++++++++++++-------------- 2 files changed, 49 insertions(+), 46 deletions(-) diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp index 8f7ca6a81..9920019de 100644 --- a/conformity/opencv/test_float_conv.cpp +++ b/conformity/opencv/test_float_conv.cpp @@ -4,31 +4,34 @@ #include "test_float_conv.h" +#include #include float floatval(uint32_t v) { - static_assert(sizeof(float) == 4); - return *reinterpret_cast(&v); + float result; + static_assert(sizeof(result) == sizeof(v)); + memcpy(&result, &v, sizeof(result)); + return result; } -float quietNaN = floatval(0x7FC00000); -float signalingNaN = floatval(0x7FA00000); -float posInfinity = floatval(0x7F800000); -float negInfinity = floatval(0xFF800000); +float quietNaN = std::numeric_limits::quiet_NaN(); +float signalingNaN = std::numeric_limits::signaling_NaN(); +float posInfinity = std::numeric_limits::infinity(); +float negInfinity = -std::numeric_limits::infinity(); float minusNaN = floatval(0xFF800001); float plusNaN = floatval(0x7F800001); -float plusZero = floatval(0x00000000); -float minusZero = floatval(0x80000000); +float plusZero = 0.0F; +float minusZero = -0.0F; float oneNaN = floatval(0x7FC00001); -float zeroDivZero = floatval(0xFFC00000); -float floatMin = floatval(0x00800000); -float floatMax = floatval(0x7F7FFFFF); +float zeroDivZero = -std::numeric_limits::quiet_NaN(); +float floatMin = std::numeric_limits::min(); +float floatMax = std::numeric_limits::max(); -float posSubnormalMin = floatval(0x00000001); +float posSubnormalMin = std::numeric_limits::denorm_min(); float posSubnormalMax = floatval(0x007FFFFF); -float negSubnormalMin = floatval(0x80000001); +float negSubnormalMin = -std::numeric_limits::denorm_min(); float negSubnormalMax = floatval(0x807FFFFF); template diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp index 49fdde174..625e96fb2 100644 --- a/test/api/test_float_conv.cpp +++ b/test/api/test_float_conv.cpp @@ -55,33 +55,33 @@ class FloatConversionTest final { OutputType expected; }; - static constexpr uint32_t quietNaN = 0x7FC00000; - static constexpr uint32_t signalingNaN = 0x7FA00000; - static constexpr uint32_t posInfinity = 0x7F800000; - static constexpr uint32_t negInfinity = 0xFF800000; - - static constexpr uint32_t minusNaN = 0xFF800001; - static constexpr uint32_t plusNaN = 0x7F800001; - static constexpr uint32_t plusZero = 0x00000000; - static constexpr uint32_t minusZero = 0x80000000; - - static constexpr uint32_t oneNaN = 0x7FC00001; - static constexpr uint32_t zeroDivZero = 0xFFC00000; - static constexpr uint32_t floatMin = 0x00800000; - static constexpr uint32_t floatMax = 0x7F7FFFFF; - - static constexpr uint32_t posSubnormalMin = 0x00000001; - static constexpr uint32_t posSubnormalMax = 0x007FFFFF; - static constexpr uint32_t negSubnormalMin = 0x80000001; - static constexpr uint32_t negSubnormalMax = 0x807FFFFF; - - static constexpr float floatval(uint32_t v) { - static_assert(sizeof(float) == 4); - KLEIDICV_NO_STRICT_ALIASING_BEGIN - return *reinterpret_cast(&v); - KLEIDICV_NO_STRICT_ALIASING_END + static float floatval(uint32_t v) { + float result; // Avoid cppcoreguidelines-init-variables. NOLINT + static_assert(sizeof(result) == sizeof(v)); + memcpy(&result, &v, sizeof(result)); + return result; } + const float quietNaN = std::numeric_limits::quiet_NaN(); + const float signalingNaN = std::numeric_limits::signaling_NaN(); + const float posInfinity = std::numeric_limits::infinity(); + const float negInfinity = -std::numeric_limits::infinity(); + + const float minusNaN = floatval(0xFF800001); + const float plusNaN = floatval(0x7F800001); + const float plusZero = 0.0F; + const float minusZero = -0.0F; + + const float oneNaN = floatval(0x7FC00001); + const float zeroDivZero = -std::numeric_limits::quiet_NaN(); + const float floatMin = std::numeric_limits::min(); + const float floatMax = std::numeric_limits::max(); + + const float posSubnormalMin = std::numeric_limits::denorm_min(); + const float posSubnormalMax = floatval(0x007FFFFF); + const float negSubnormalMin = -std::numeric_limits::denorm_min(); + const float negSubnormalMax = floatval(0x807FFFFF); + template , bool> = true, std::enable_if_t, bool> = true> @@ -90,10 +90,10 @@ class FloatConversionTest final { // clang-format off 4, 8, {{ - { floatval(quietNaN), floatval(signalingNaN), floatval(posInfinity), floatval(negInfinity) }, - { floatval(minusNaN), floatval(plusNaN), floatval(plusZero), floatval(minusZero) }, - { floatval(oneNaN), floatval(zeroDivZero), floatval(floatMin), floatval(floatMax) }, - { floatval(posSubnormalMin), floatval(posSubnormalMax), floatval(negSubnormalMin), floatval(negSubnormalMax) }, + { quietNaN, signalingNaN, posInfinity, negInfinity }, + { minusNaN, plusNaN, plusZero, minusZero }, + { oneNaN, zeroDivZero, floatMin, floatMax }, + { posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax }, { 1111.11, -1112.22, 113.33, 114.44 }, { 111.51, 112.62, 113.73, 114.84 }, { 126.66, 127.11, 128.66, 129.11 }, @@ -122,10 +122,10 @@ class FloatConversionTest final { // clang-format off 4, 8, {{ - { floatval(quietNaN), floatval(signalingNaN), floatval(posInfinity), floatval(negInfinity) }, - { floatval(minusNaN), floatval(plusNaN), floatval(plusZero), floatval(minusZero) }, - { floatval(oneNaN), floatval(zeroDivZero), floatval(floatMin), floatval(floatMax) }, - { floatval(posSubnormalMin), floatval(posSubnormalMax), floatval(negSubnormalMin), floatval(negSubnormalMax) }, + { quietNaN, signalingNaN, posInfinity, negInfinity }, + { minusNaN, plusNaN, plusZero, minusZero }, + { oneNaN, zeroDivZero, floatMin, floatMax }, + { posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax }, { 1111.11, -1112.22, 113.33, 114.44 }, { 111.51, 112.62, 113.73, 114.84 }, { 126.66, 127.11, 128.66, 129.11 }, -- GitLab From e9a16a6f24592c47dc67bd2a57ca1d7cf7f7ea69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 17 Apr 2024 17:55:30 +0200 Subject: [PATCH 03/13] Rename float conversion sizing tests Previously named "Custom" tests, although doesn't really fit the definition of testing custom values. --- test/api/test_float_conv.cpp | 46 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp index 625e96fb2..29cc08336 100644 --- a/test/api/test_float_conv.cpp +++ b/test/api/test_float_conv.cpp @@ -304,7 +304,7 @@ class FloatConversionTest final { EXPECT_EQ_ARRAY2D(expected, actual); } - void test_sizes(const size_t width, const size_t height) { + void test_sizing(const size_t width, const size_t height) { auto values_list = get_values(); test::Array2D source(width, height, 1, 1); @@ -381,58 +381,58 @@ TYPED_TEST(FloatConversion, Custom) { using OutputType = typename TypeParam::second_type; FloatConversionTest{}.test_custom(); } -TYPED_TEST(FloatConversion, CustomFits128VectorSize) { +TYPED_TEST(FloatConversion, SizingFits128VectorSize) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(4, 1); + FloatConversionTest{}.test_sizing(4, 1); } -TYPED_TEST(FloatConversion, CustomFits128VectorSize2x) { +TYPED_TEST(FloatConversion, SizingFits128VectorSize2x) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(4, 2); + FloatConversionTest{}.test_sizing(4, 2); } -TYPED_TEST(FloatConversion, CustomFits128VectorSize3x) { +TYPED_TEST(FloatConversion, SizingFits128VectorSize3x) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(4, 3); + FloatConversionTest{}.test_sizing(4, 3); } -TYPED_TEST(FloatConversion, CustomFits512VectorSize) { +TYPED_TEST(FloatConversion, SizingFits512VectorSize) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(4, 4); + FloatConversionTest{}.test_sizing(4, 4); } -TYPED_TEST(FloatConversion, CustomFits512VectorSize2x) { +TYPED_TEST(FloatConversion, SizingFits512VectorSize2x) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(4, 8); + FloatConversionTest{}.test_sizing(4, 8); } -TYPED_TEST(FloatConversion, CustomFits512VectorSize3x) { +TYPED_TEST(FloatConversion, SizingFits512VectorSize3x) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(6, 8); + FloatConversionTest{}.test_sizing(6, 8); } -TYPED_TEST(FloatConversion, Custom128OneRemaining) { +TYPED_TEST(FloatConversion, Sizing128OneRemaining) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(1, 17); + FloatConversionTest{}.test_sizing(1, 17); } -TYPED_TEST(FloatConversion, Custom128AllButOneRemaining) { +TYPED_TEST(FloatConversion, Sizing128AllButOneRemaining) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(5, 3); + FloatConversionTest{}.test_sizing(5, 3); } -TYPED_TEST(FloatConversion, CustomAboutHalfRemaining) { +TYPED_TEST(FloatConversion, SizingAboutHalfRemaining) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(19, 2); + FloatConversionTest{}.test_sizing(19, 2); } -TYPED_TEST(FloatConversion, CustomEmpty) { +TYPED_TEST(FloatConversion, SizingEmpty) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(0, 0); + FloatConversionTest{}.test_sizing(0, 0); } -TYPED_TEST(FloatConversion, CustomOne) { +TYPED_TEST(FloatConversion, SizingOne) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizes(1, 1); + FloatConversionTest{}.test_sizing(1, 1); } -- GitLab From 635e7bbc18432a452623c7ce77d84b75c7f23f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 17 Apr 2024 17:56:14 +0200 Subject: [PATCH 04/13] Refactor template parameters in float_conversion_sc --- kleidicv/src/conversions/float_conv_sc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kleidicv/src/conversions/float_conv_sc.h b/kleidicv/src/conversions/float_conv_sc.h index 54a5f7471..68cad2918 100644 --- a/kleidicv/src/conversions/float_conv_sc.h +++ b/kleidicv/src/conversions/float_conv_sc.h @@ -144,7 +144,7 @@ class float_conversion_operation { } }; // end of class float_conversion_operation -template +template static kleidicv_error_t float_conversion_sc( const InputType* src, size_t src_stride, OutputType* dst, size_t dst_stride, size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE { @@ -152,10 +152,10 @@ static kleidicv_error_t float_conversion_sc( CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); CHECK_IMAGE_SIZE(width, height); - float_conversion_operation operation; + float_conversion_operation operation; Rectangle rect{width, height}; - Rows src_rows{src, src_stride}; - Rows dst_rows{dst, dst_stride}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; zip_rows(operation, rect, src_rows, dst_rows); return KLEIDICV_OK; -- GitLab From 1a8d3d13b96d4a3371ef352c433d41da9a6c1e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 17 Apr 2024 17:57:27 +0200 Subject: [PATCH 05/13] Rename "fill" conformity tests to "random" --- conformity/opencv/CMakeLists.txt | 1 + conformity/opencv/test_float_conv.cpp | 32 +++++++++++++-------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 31fc56248..7bb122fd6 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -36,6 +36,7 @@ add_executable( test_rgb2yuv.cpp test_sobel.cpp test_exp.cpp + test_float_conv.cpp ) target_link_libraries( diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp index 9920019de..13e8c8812 100644 --- a/conformity/opencv/test_float_conv.cpp +++ b/conformity/opencv/test_float_conv.cpp @@ -178,28 +178,28 @@ bool test_int8_to_float32_custom(int index, std::vector& float_conversion_tests_get() { // clang-format off static std::vector tests = { - TEST("Float32 to Signed Int8, fill, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), - TEST("Float32 to Signed Int8, fill, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), - TEST("Float32 to Signed Int8, fill, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), - TEST("Float32 to Signed Int8, fill, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, random, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, random, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, random, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, random, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), - TEST("Float32 to Unsigned Int8, fill, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), - TEST("Float32 to Unsigned Int8, fill, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), - TEST("Float32 to Unsigned Int8, fill, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), - TEST("Float32 to Unsigned Int8, fill, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, random, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, random, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, random, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, random, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), TEST("Float32 to Signed Int8, custom (special)", test_float32_to_int8_custom, exec_float32_to_int8), TEST("Float32 to Unsigned Int8, custom (special)", test_float32_to_int8_custom, exec_float32_to_int8), - TEST("Signed Int8 to Float32, fill, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), - TEST("Signed Int8 to Float32, fill, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), - TEST("Signed Int8 to Float32, fill, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), - TEST("Signed Int8 to Float32, fill, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, random, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, random, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, random, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, random, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), - TEST("Unsigned Int8 to Float32, fill, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), - TEST("Unsigned Int8 to Float32, fill, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), - TEST("Unsigned Int8 to Float32, fill, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), - TEST("Unsigned Int8 to Float32, fill, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, random, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, random, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, random, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, random, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), TEST("Signed Int8 to Float32, custom", test_int8_to_float32_custom, exec_int8_to_float32), TEST("Unigned Int8 to Float32, custom", test_int8_to_float32_custom, exec_int8_to_float32), -- GitLab From 79cd660422edcf8c17dfd5c2d21171c9d99fdbca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 17 Apr 2024 18:00:40 +0200 Subject: [PATCH 06/13] Implement NEON conversion from float to int8/uint8 --- kleidicv/include/kleidicv/neon_intrinsics.h | 22 ++++- kleidicv/src/conversions/float_conv_neon.cpp | 99 +++++++++++++++++++- 2 files changed, 117 insertions(+), 4 deletions(-) diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index 62053bc8e..0add00459 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -349,7 +349,14 @@ static inline float32x4x4_t vld1q_x4(const float32_t *src) { return vld1q_f32_x4 // NEON store operations // ----------------------------------------------------------------------------- -static inline void vst1(uint8_t *dst, uint8x8_t vec) { vst1_u8(dst, vec); } +static inline void vst1(int8_t *dst, int8x8_t vec) { vst1_s8(dst, vec); } +static inline void vst1(uint8_t *dst, uint8x8_t vec) { vst1_u8(dst, vec); } +static inline void vst1(int16_t *dst, int16x4_t vec) { vst1_s16(dst, vec); } +static inline void vst1(uint16_t *dst, uint16x4_t vec) { vst1_u16(dst, vec); } +static inline void vst1(int32_t *dst, int32x2_t vec) { vst1_s32(dst, vec); } +static inline void vst1(uint32_t *dst, uint32x2_t vec) { vst1_u32(dst, vec); } +static inline void vst1(int64_t *dst, int64x1_t vec) { vst1_s64(dst, vec); } +static inline void vst1(uint64_t *dst, uint64x1_t vec) { vst1_u64(dst, vec); } static inline void vst1q(int8_t *dst, int8x16_t vec) { vst1q_s8(dst, vec); } static inline void vst1q(uint8_t *dst, uint8x16_t vec) { vst1q_u8(dst, vec); } @@ -433,6 +440,19 @@ static inline uint64x2_t vreinterpretq_u64(uint32x4_t vec) { return vreinterpret static inline uint64x2_t vreinterpretq_u64(int64x2_t vec) { return vreinterpretq_u64_s64(vec); } static inline uint64x2_t vreinterpretq_u64(uint64x2_t vec) { return vec; } +// ----------------------------------------------------------------------------- +// vcombine* +// ----------------------------------------------------------------------------- + +static inline int8x16_t vcombine(int8x8_t lhs, int8x8_t rhs) { return vcombine_s8(lhs, rhs); } +static inline uint8x16_t vcombine(uint8x8_t lhs, uint8x8_t rhs) { return vcombine_u8(lhs, rhs); } +static inline int16x8_t vcombine(int16x4_t lhs, int16x4_t rhs) { return vcombine_s16(lhs, rhs); } +static inline uint16x8_t vcombine(uint16x4_t lhs, uint16x4_t rhs) { return vcombine_u16(lhs, rhs); } +static inline int32x4_t vcombine(int32x2_t lhs, int32x2_t rhs) { return vcombine_s32(lhs, rhs); } +static inline uint32x4_t vcombine(uint32x2_t lhs, uint32x2_t rhs) { return vcombine_u32(lhs, rhs); } +static inline int64x2_t vcombine(int64x1_t lhs, int64x1_t rhs) { return vcombine_s64(lhs, rhs); } +static inline uint64x2_t vcombine(uint64x1_t lhs, uint64x1_t rhs) { return vcombine_u64(lhs, rhs); } + // clang-format on } // namespace kleidicv::neon diff --git a/kleidicv/src/conversions/float_conv_neon.cpp b/kleidicv/src/conversions/float_conv_neon.cpp index b6e1fe0a8..90dd5af3e 100644 --- a/kleidicv/src/conversions/float_conv_neon.cpp +++ b/kleidicv/src/conversions/float_conv_neon.cpp @@ -2,15 +2,108 @@ // // SPDX-License-Identifier: Apache-2.0 +#include + #include "kleidicv/kleidicv.h" #include "kleidicv/neon.h" namespace kleidicv::neon { template -kleidicv_error_t float_conversion(const InputType*, size_t, OutputType*, size_t, - size_t, size_t) { - return KLEIDICV_ERROR_NOT_IMPLEMENTED; +class float_conversion_operation; + +template +class float_conversion_operation { + public: + using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits< + std::conditional_t, int32_t, uint32_t>>; + using IntermediateVectorType = typename IntermediateVecTraits::VectorType; + + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, SrcVecTraits::num_lanes()} + .unroll_twice([&](size_t step) { + SrcVectorType src_vector1 = vld1q_f32(&src[0]); + SrcVectorType src_vector2 = + vld1q_f32(&src[SrcVecTraits::num_lanes()]); + IntermediateVectorType result_vector1 = + vector_path(src_vector1); + IntermediateVectorType result_vector2 = + vector_path(src_vector2); + vst1(&dst[0], vqmovn(vcombine(vqmovn(result_vector1), + vqmovn(result_vector2)))); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) { + for (size_t index = 0; index < length; ++index) { + disable_loop_vectorization(); + float f = std::nearbyint(src[ptrdiff_t(index)]); + if (f > std::numeric_limits::max()) { + f = std::numeric_limits::max(); + } else if (f < std::numeric_limits::min()) { + f = std::numeric_limits::min(); + } + dst[index] = static_cast(f); + } + }); + } + + private: + template < + typename O, + std::enable_if_t && std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(SrcVectorType src) { + IntermediateVectorType result = vcvtnq_s32_f32(src); + return result; + } + + template < + typename O, + std::enable_if_t && !std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(SrcVectorType src) { + IntermediateVectorType result = vcvtnq_u32_f32(src); + return result; + } +}; // end of class float_conversion_operation + +template +class float_conversion_operation { + public: + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + void process_row(size_t width, Columns /*src*/, + Columns /*dst*/) { + LoopUnroll{width, VecTraits::num_lanes()} + .unroll_twice([&](size_t step) { + step = step + step; // placeholder + }) + .remaining([&](size_t /*length*/, size_t) { + // + }); + } + + private: + // +}; // end of class float_conversion_operation + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + float_conversion_operation operation; + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + zip_rows(operation, rect, src_rows, dst_rows); + + return KLEIDICV_OK; } #define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype) \ -- GitLab From 22967cfc6a0b05c3978db78820e562e0323e4573 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Mon, 22 Apr 2024 14:22:30 +0200 Subject: [PATCH 07/13] Implement NEON conversion from int8/uint8 to float --- kleidicv/src/conversions/float_conv_neon.cpp | 67 +++++++++++++++++--- 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/kleidicv/src/conversions/float_conv_neon.cpp b/kleidicv/src/conversions/float_conv_neon.cpp index 90dd5af3e..ba8c17cb6 100644 --- a/kleidicv/src/conversions/float_conv_neon.cpp +++ b/kleidicv/src/conversions/float_conv_neon.cpp @@ -72,21 +72,70 @@ class float_conversion_operation { template class float_conversion_operation { public: - using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; - using VectorType = typename VecTraits::VectorType; - void process_row(size_t width, Columns /*src*/, - Columns /*dst*/) { - LoopUnroll{width, VecTraits::num_lanes()} + using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using DstVectorType = typename DstVecTraits::VectorType; + using DstVector4Type = typename DstVecTraits::Vector4Type; + + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, SrcVecTraits::num_lanes()} .unroll_twice([&](size_t step) { - step = step + step; // placeholder + DstVector4Type result_vector1 = + vector_path(vld1q(&src[0])); + DstVector4Type result_vector2 = + vector_path(vld1q(&src[SrcVecTraits::num_lanes()])); + vst1q_f32_x4(&dst[0], result_vector1); + vst1q_f32_x4(&dst[DstVecTraits::num_lanes() * 4], result_vector2); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); }) - .remaining([&](size_t /*length*/, size_t) { - // + .remaining([&](size_t length, size_t) { + for (size_t index = 0; index < length; ++index) { + disable_loop_vectorization(); + InputType n = src[ptrdiff_t(index)]; + dst[ptrdiff_t(index)] = static_cast(n); + } }); } private: - // + template < + typename I, + std::enable_if_t && std::is_signed_v, int> = 0> + DstVector4Type vector_path(const SrcVectorType src) { + DstVector4Type dst_vect; + int16x8_t low = vmovl_s8(vget_low_s8(src)); + int16x8_t hi = vmovl_high_s8(src); + int32x4_t lowlow = vmovl_s16(vget_low_s16(low)); + int32x4_t lowhi = vmovl_high_s16(low); + int32x4_t hilow = vmovl_s16(vget_low_s16(hi)); + int32x4_t hihi = vmovl_high_s16(hi); + dst_vect.val[0] = vcvtq_f32_s32(lowlow); + dst_vect.val[1] = vcvtq_f32_s32(lowhi); + dst_vect.val[2] = vcvtq_f32_s32(hilow); + dst_vect.val[3] = vcvtq_f32_s32(hihi); + return dst_vect; + } + + template < + typename I, + std::enable_if_t && !std::is_signed_v, int> = 0> + DstVector4Type vector_path(const SrcVectorType src) { + DstVector4Type dst_vect; + uint16x8_t low = vmovl_u8(vget_low_u8(src)); + uint16x8_t hi = vmovl_high_u8(src); + uint32x4_t lowlow = vmovl_u16(vget_low_u16(low)); + uint32x4_t lowhi = vmovl_high_u16(low); + uint32x4_t hilow = vmovl_u16(vget_low_u16(hi)); + uint32x4_t hihi = vmovl_high_u16(hi); + dst_vect.val[0] = vcvtq_f32_u32(lowlow); + dst_vect.val[1] = vcvtq_f32_u32(lowhi); + dst_vect.val[2] = vcvtq_f32_u32(hilow); + dst_vect.val[3] = vcvtq_f32_u32(hihi); + return dst_vect; + } }; // end of class float_conversion_operation template -- GitLab From 824e48170c5ccd4527da35405f425fc39ad3aed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Tue, 23 Apr 2024 14:37:23 +0200 Subject: [PATCH 08/13] Adjust width and height in some sizing tests The width is passed to the process_row function in the float_conversion_operation class as is, which means its value can affect the behavior of the code inside process_row. We can use larger widths in a few test cases to minimize false negatives. --- test/api/test_float_conv.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp index 29cc08336..3139e11cb 100644 --- a/test/api/test_float_conv.cpp +++ b/test/api/test_float_conv.cpp @@ -394,7 +394,7 @@ TYPED_TEST(FloatConversion, SizingFits128VectorSize2x) { TYPED_TEST(FloatConversion, SizingFits128VectorSize3x) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizing(4, 3); + FloatConversionTest{}.test_sizing(12, 1); } TYPED_TEST(FloatConversion, SizingFits512VectorSize) { using InputType = typename TypeParam::first_type; @@ -404,7 +404,7 @@ TYPED_TEST(FloatConversion, SizingFits512VectorSize) { TYPED_TEST(FloatConversion, SizingFits512VectorSize2x) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizing(4, 8); + FloatConversionTest{}.test_sizing(16, 2); } TYPED_TEST(FloatConversion, SizingFits512VectorSize3x) { using InputType = typename TypeParam::first_type; @@ -424,7 +424,7 @@ TYPED_TEST(FloatConversion, Sizing128AllButOneRemaining) { TYPED_TEST(FloatConversion, SizingAboutHalfRemaining) { using InputType = typename TypeParam::first_type; using OutputType = typename TypeParam::second_type; - FloatConversionTest{}.test_sizing(19, 2); + FloatConversionTest{}.test_sizing(38, 1); } TYPED_TEST(FloatConversion, SizingEmpty) { using InputType = typename TypeParam::first_type; -- GitLab From 79701f4c638490b75c013f354651feeddc731920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Tue, 23 Apr 2024 15:06:56 +0200 Subject: [PATCH 09/13] Use different values for float sizing tests With this change, code coverage should be improved at least for the remaining path section of the NEON float to int8 conversion code, specifically the "less than minimum" branch. --- test/api/test_float_conv.cpp | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp index 3139e11cb..5f3b0dda3 100644 --- a/test/api/test_float_conv.cpp +++ b/test/api/test_float_conv.cpp @@ -167,23 +167,47 @@ class FloatConversionTest final { template , bool> = true, - std::enable_if_t, bool> = true> + std::enable_if_t, bool> = true> const Values& get_values() { static const Values kTestValues = { // clang-format off - 10.67F, 11 + -1000.67F, -128 // clang-format on }; return kTestValues; } template , bool> = true, + std::enable_if_t, bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + -1000.67F, 0 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + -127, -127.0 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, std::enable_if_t, bool> = true> const Values& get_values() { static const Values kTestValues = { // clang-format off - 11, 11.0 + 255, 255.0 // clang-format on }; return kTestValues; -- GitLab From cce13740c04bd4b4153d932e4811d343de5a9f27 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Tue, 23 Apr 2024 17:04:27 +0200 Subject: [PATCH 10/13] Update doc-opencv.md --- doc/opencv.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/opencv.md b/doc/opencv.md index 0aa0bb525..57f7d7e32 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -167,7 +167,16 @@ Notes on parameters: + `CV_32S` ### `convertTo` -Currently converting to different data types is not supported. This function scales given input of `src_depth == CV_8U` using `scale` and `shift`. +This function will scale given input using `scale` and `shift` if they are significant enough, and if `src_depth` and `dst_depth` are equal to `CV_8U`. + +Additionally, it is able to convert between data types as follows: + +| src_depth | dst_depth | +|-----------|-----------| +| CV_32F | CV_8S | +| CV_32F | CV_8U | +| CV_8S | CV_32F | +| CV_8U | CV_32F | ### `exp` Exponential function. Currently only `CV_32F` type is supported. @@ -181,4 +190,4 @@ Notes on parameters: * `operation` - flag specifying correspondence between the arrays. Supported [OpenCV cmp types](https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga0cc47ff833d40b58ecbe1d609a53d784) are: + `cv::CMP_EQ ` - + `cv::CMP_GT` + + `cv::CMP_GT` \ No newline at end of file -- GitLab From 9be5992dd0b573424e83cec9a0d54de95881e7a1 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 24 Apr 2024 13:13:52 +0200 Subject: [PATCH 11/13] Load source in process_row instead of vector_path --- kleidicv/src/conversions/float_conv_sc.h | 40 ++++++++++++++++-------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/kleidicv/src/conversions/float_conv_sc.h b/kleidicv/src/conversions/float_conv_sc.h index 68cad2918..d603d0c31 100644 --- a/kleidicv/src/conversions/float_conv_sc.h +++ b/kleidicv/src/conversions/float_conv_sc.h @@ -102,9 +102,11 @@ class float_conversion_operation { LoopUnroll{width, VecTraits::num_lanes()} .unroll_twice([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE { svbool_t pg = VecTraits::svptrue(); - VectorType dst_vector1 = vector_path(pg, &src[0]); - VectorType dst_vector2 = vector_path( - pg, &src.at(ptrdiff_t(VecTraits::num_lanes()))[0]); + auto src_vect1 = load_src(pg, &src[0], 0); + auto src_vect2 = load_src(pg, &src[0], 1); + + VectorType dst_vector1 = vector_path(pg, src_vect1); + VectorType dst_vector2 = vector_path(pg, src_vect2); svst1(pg, &dst[0], dst_vector1); svst1_vnum(pg, &dst[0], 1, dst_vector2); src += ptrdiff_t(step); @@ -114,8 +116,8 @@ class float_conversion_operation { size_t index = 0; svbool_t pg = VecTraits::svwhilelt(index, length); while (svptest_first(VecTraits::svptrue(), pg)) { - VectorType dst_vector = - vector_path(pg, &src[ptrdiff_t(index)]); + auto src_vect = load_src(pg, &src[ptrdiff_t(index)], 0); + VectorType dst_vector = vector_path(pg, src_vect); svst1(pg, &dst[ptrdiff_t(index)], dst_vector); // Update loop counter and calculate the next governing predicate. index += VecTraits::num_lanes(); @@ -125,22 +127,34 @@ class float_conversion_operation { } private: + template , int> = 0> + VectorType vector_path(svbool_t& pg, + I src_vector) KLEIDICV_STREAMING_COMPATIBLE { + return svcvt_f32_s32_x(pg, src_vector); + } + template , int> = 0> + VectorType vector_path(svbool_t& pg, + I src_vector) KLEIDICV_STREAMING_COMPATIBLE { + return svcvt_f32_u32_x(pg, src_vector); + } + template < typename I, std::enable_if_t && std::is_signed_v, int> = 0> - VectorType vector_path(svbool_t& pg, - const I* src) KLEIDICV_STREAMING_COMPATIBLE { - svint32_t src_vector = svld1sb_s32(pg, src); - return svcvt_f32_s32_x(pg, src_vector); + svint32_t load_src(svbool_t& pg, const I* src, + size_t vnum) KLEIDICV_STREAMING_COMPATIBLE { + svint32_t src_vect = svld1sb_vnum_s32(pg, src, vnum); + return src_vect; } template < typename I, std::enable_if_t && !std::is_signed_v, int> = 0> - VectorType vector_path(svbool_t& pg, - const I* src) KLEIDICV_STREAMING_COMPATIBLE { - svuint32_t src_vector = svld1ub_u32(pg, src); - return svcvt_f32_u32_x(pg, src_vector); + svuint32_t load_src(svbool_t& pg, const I* src, + size_t vnum) KLEIDICV_STREAMING_COMPATIBLE { + svuint32_t src_vect = svld1ub_vnum_u32(pg, src, vnum); + return src_vect; } }; // end of class float_conversion_operation -- GitLab From 8f71e065fe312288d82e35b4d942d6fe2206ffea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Tue, 30 Apr 2024 19:59:36 +0200 Subject: [PATCH 12/13] Add benchmarks --- benchmark/benchmark.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index e4772357e..780400ec9 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -76,6 +76,36 @@ BENCH_UNARY_OP(rgba_to_yuv_u8, 4, uint8_t); BENCH_UNARY_OP(bgr_to_yuv_u8, 3, uint8_t); BENCH_UNARY_OP(bgra_to_yuv_u8, 4, uint8_t); +template +static void bench_unary_op(Function f, benchmark::State& state) { + // Setup + std::vector src; + std::vector dst; + src.resize(image_width * image_height); + dst.resize(image_width * image_height); + + std::mt19937 generator; + std::generate(src.begin(), src.end(), generator); + + for (auto _ : state) { + // This code gets benchmarked + auto unused = f(src.data(), image_width, dst.data(), image_width, + image_width, image_height); + (void)unused; + } +} + +#define BENCH_UNARY_OP_DIFFERENT_IO_TYPES(name, itype, otype) \ + static void name(benchmark::State& state) { \ + bench_unary_op(kleidicv_##name, state); \ + } \ + BENCHMARK(name) + +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_s8, float, int8_t); +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_u8, float, uint8_t); +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_s8_f32, int8_t, float); +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_u8_f32, uint8_t, float); + static void min_max_loc_u8(benchmark::State& state) { // Setup std::vector src; -- GitLab From 4e22a2de08740a60f3d2b3652dde9e297fe46b08 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Thu, 6 Jun 2024 12:27:11 +0100 Subject: [PATCH 13/13] Workaround illegal NEON instruction in streaming-compatible function Current head of clang-19 generates NEON instructions in the float_conversion_sc function even though marked as streaming compatible. --- kleidicv/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt index b06555250..8ece23cd4 100644 --- a/kleidicv/CMakeLists.txt +++ b/kleidicv/CMakeLists.txt @@ -166,7 +166,7 @@ if(KLEIDICV_BUILD_SME2) set_target_properties(kleidicv_sme2 PROPERTIES CXX_STANDARD 17) target_compile_options(kleidicv_sme2 PRIVATE ${KLEIDICV_CXX_FLAGS} - "-march=armv9-a+sve2+sme2" + "-march=armv9-a+sve2+sme2+nosimd" "-DKLEIDICV_TARGET_SME2=1" ) endif() -- GitLab