diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 04b672a081a92ea49e17737bcf3af11b5de9d10d..627fb545d1af976c558545610688ab18559641bb 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -707,6 +707,33 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, } } + // type conversion only + if (scale == 1.0 && shift == 0.0) { + // float32 to int8 + if (src_depth == CV_32F && dst_depth == CV_8S) { + return convert_error(kleidicv_float_conversion_f32_s8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + // float32 to uint8 + if (src_depth == CV_32F && dst_depth == CV_8U) { + return convert_error(kleidicv_float_conversion_f32_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + // int8 to float32 + if (src_depth == CV_8S && dst_depth == CV_32F) { + return convert_error(kleidicv_float_conversion_s8_f32( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + // uint8 to float32 + if (src_depth == CV_8U && dst_depth == CV_32F) { + return convert_error(kleidicv_float_conversion_u8_f32( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height)); + } + } return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index e4772357e20909e97d946be93649e91e49081c1a..780400ec9e64080a03fcf3cad5c1a2a1928c2041 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -76,6 +76,36 @@ BENCH_UNARY_OP(rgba_to_yuv_u8, 4, uint8_t); BENCH_UNARY_OP(bgr_to_yuv_u8, 3, uint8_t); BENCH_UNARY_OP(bgra_to_yuv_u8, 4, uint8_t); +template +static void bench_unary_op(Function f, benchmark::State& state) { + // Setup + std::vector src; + std::vector dst; + src.resize(image_width * image_height); + dst.resize(image_width * image_height); + + std::mt19937 generator; + std::generate(src.begin(), src.end(), generator); + + for (auto _ : state) { + // This code gets benchmarked + auto unused = f(src.data(), image_width, dst.data(), image_width, + image_width, image_height); + (void)unused; + } +} + +#define BENCH_UNARY_OP_DIFFERENT_IO_TYPES(name, itype, otype) \ + static void name(benchmark::State& state) { \ + bench_unary_op(kleidicv_##name, state); \ + } \ + BENCHMARK(name) + +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_s8, float, int8_t); +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_u8, float, uint8_t); +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_s8_f32, int8_t, float); +BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_u8_f32, uint8_t, float); + static void min_max_loc_u8(benchmark::State& state) { // Setup std::vector src; diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 4fc695794d84c71fd7dc685035d19aecfc01be2c..7bb122fd65e206a05b9cea6b5662a44370490fc1 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -36,6 +36,7 @@ add_executable( test_rgb2yuv.cpp test_sobel.cpp test_exp.cpp + test_float_conv.cpp ) target_link_libraries( @@ -76,6 +77,8 @@ add_executable( test_rgb2yuv.cpp test_sobel.cpp test_exp.cpp + test_float_conv.cpp + ) target_link_libraries( diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..13e8c88121c238ec941cfc07c70d8ef605224349 --- /dev/null +++ b/conformity/opencv/test_float_conv.cpp @@ -0,0 +1,209 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "test_float_conv.h" + +#include +#include + +float floatval(uint32_t v) { + float result; + static_assert(sizeof(result) == sizeof(v)); + memcpy(&result, &v, sizeof(result)); + return result; +} + +float quietNaN = std::numeric_limits::quiet_NaN(); +float signalingNaN = std::numeric_limits::signaling_NaN(); +float posInfinity = std::numeric_limits::infinity(); +float negInfinity = -std::numeric_limits::infinity(); + +float minusNaN = floatval(0xFF800001); +float plusNaN = floatval(0x7F800001); +float plusZero = 0.0F; +float minusZero = -0.0F; + +float oneNaN = floatval(0x7FC00001); +float zeroDivZero = -std::numeric_limits::quiet_NaN(); +float floatMin = std::numeric_limits::min(); +float floatMax = std::numeric_limits::max(); + +float posSubnormalMin = std::numeric_limits::denorm_min(); +float posSubnormalMax = floatval(0x007FFFFF); +float negSubnormalMin = -std::numeric_limits::denorm_min(); +float negSubnormalMax = floatval(0x807FFFFF); + +template +cv::Mat exec_float32_to_int8(cv::Mat& input) { + cv::Mat result; + input.convertTo(result, Signed ? CV_8SC1 : CV_8UC1); + return result; +} + +cv::Mat exec_int8_to_float32(cv::Mat& input) { + cv::Mat result; + input.convertTo(result, CV_32FC1); + return result; +} + +#if MANAGER +template +bool test_float32_to_int8_random(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, CV_32FC(Channels)); + rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000); + + cv::Mat actual = exec_float32_to_int8(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + } + } + + return false; +} + +template +bool test_int8_to_float32_random(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, Signed ? CV_8SC(Channels) : CV_8UC(Channels)); + rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000); + + cv::Mat actual = exec_int8_to_float32(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + } + } + + return false; +} + +static constexpr int custom_data_float_height = 8; +static constexpr int custom_data_float_width = 4; + +static float + custom_data_float[custom_data_float_height * custom_data_float_width] = { + // clang-format off + quietNaN, signalingNaN, posInfinity, negInfinity, + minusNaN, plusNaN, plusZero, minusZero, + oneNaN, zeroDivZero, floatMin, floatMax, + posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax, + 1111.11, -1112.22, 113.33, 114.44, + 111.51, 112.62, 113.73, 114.84, + 126.66, 127.11, 128.66, 129.11, + 11.5, 12.5, -11.5, -12.5, + // clang-format on +}; + +static constexpr int custom_data_int8_height = 1; +static constexpr int custom_data_int8_width = 7; + +static int8_t + custom_data_int8[custom_data_int8_height * custom_data_int8_width] = { + // clang-format off + -128, -127, -1, 0, 1, 126, 127 + // clang-format on +}; + +static uint8_t + custom_data_uint8[custom_data_int8_height * custom_data_int8_width] = { + // clang-format off + 0, 1, 126, 127, 128, 254, 255 + // clang-format on +}; + +template +bool test_float32_to_int8_custom(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat input(custom_data_float_height, custom_data_float_width, CV_32FC1, + custom_data_float); + + cv::Mat actual = exec_float32_to_int8(input); + cv::Mat expected = + get_expected_from_subordinate(index, request_queue, reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(custom_data_float_height, custom_data_float_width, + input, actual, expected); + return true; + } + + return false; +} + +template +bool test_int8_to_float32_custom(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat input(custom_data_int8_height, custom_data_int8_width, + Signed ? CV_8SC1 : CV_8UC1, + Signed ? static_cast(custom_data_int8) + : static_cast(custom_data_uint8)); + + cv::Mat actual = exec_int8_to_float32(input); + cv::Mat expected = + get_expected_from_subordinate(index, request_queue, reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(custom_data_int8_height, custom_data_int8_width, input, + actual, expected); + return true; + } + + return false; +} +#endif + +std::vector& float_conversion_tests_get() { + // clang-format off + static std::vector tests = { + TEST("Float32 to Signed Int8, random, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, random, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, random, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Signed Int8, random, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), + + TEST("Float32 to Unsigned Int8, random, 1 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, random, 2 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, random, 3 channel", (test_float32_to_int8_random), exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, random, 4 channel", (test_float32_to_int8_random), exec_float32_to_int8), + + TEST("Float32 to Signed Int8, custom (special)", test_float32_to_int8_custom, exec_float32_to_int8), + TEST("Float32 to Unsigned Int8, custom (special)", test_float32_to_int8_custom, exec_float32_to_int8), + + TEST("Signed Int8 to Float32, random, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, random, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, random, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Signed Int8 to Float32, random, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), + + TEST("Unsigned Int8 to Float32, random, 1 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, random, 2 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, random, 3 channel", (test_int8_to_float32_random), exec_int8_to_float32), + TEST("Unsigned Int8 to Float32, random, 4 channel", (test_int8_to_float32_random), exec_int8_to_float32), + + TEST("Signed Int8 to Float32, custom", test_int8_to_float32_custom, exec_int8_to_float32), + TEST("Unigned Int8 to Float32, custom", test_int8_to_float32_custom, exec_int8_to_float32), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/test_float_conv.h b/conformity/opencv/test_float_conv.h new file mode 100644 index 0000000000000000000000000000000000000000..9e4c40be6f28f9d49e2ca3e525ce3fd6c395b094 --- /dev/null +++ b/conformity/opencv/test_float_conv.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ + +#include + +#include "tests.h" + +std::vector& float_conversion_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index cb5fe944c3f92105461ce6548b90a4db8c0171f7..8064a6565ec987aa949cc235ed83f686fceede89 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -12,6 +12,7 @@ #include "opencv2/imgproc.hpp" #include "test_binary_op.h" #include "test_exp.h" +#include "test_float_conv.h" #include "test_gaussian_blur.h" #include "test_min_max.h" #include "test_rgb2yuv.h" @@ -34,6 +35,7 @@ std::vector all_tests = merge_tests({ rgb2yuv_tests_get, sobel_tests_get, exp_tests_get, + float_conversion_tests_get, }); #if MANAGER diff --git a/doc/opencv.md b/doc/opencv.md index 0aa0bb525c201b8c4e70ecec436afd60538051a6..57f7d7e32a228d4d9df5a178b1564d5b6cde50c6 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -167,7 +167,16 @@ Notes on parameters: + `CV_32S` ### `convertTo` -Currently converting to different data types is not supported. This function scales given input of `src_depth == CV_8U` using `scale` and `shift`. +This function will scale given input using `scale` and `shift` if they are significant enough, and if `src_depth` and `dst_depth` are equal to `CV_8U`. + +Additionally, it is able to convert between data types as follows: + +| src_depth | dst_depth | +|-----------|-----------| +| CV_32F | CV_8S | +| CV_32F | CV_8U | +| CV_8S | CV_32F | +| CV_8U | CV_32F | ### `exp` Exponential function. Currently only `CV_32F` type is supported. @@ -181,4 +190,4 @@ Notes on parameters: * `operation` - flag specifying correspondence between the arrays. Supported [OpenCV cmp types](https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga0cc47ff833d40b58ecbe1d609a53d784) are: + `cv::CMP_EQ ` - + `cv::CMP_GT` + + `cv::CMP_GT` \ No newline at end of file diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt index b06555250b62d3c7c1e6544e283606f9e02092dc..8ece23cd4956acfacc9a3a0a5130c9645e2705a3 100644 --- a/kleidicv/CMakeLists.txt +++ b/kleidicv/CMakeLists.txt @@ -166,7 +166,7 @@ if(KLEIDICV_BUILD_SME2) set_target_properties(kleidicv_sme2 PROPERTIES CXX_STANDARD 17) target_compile_options(kleidicv_sme2 PRIVATE ${KLEIDICV_CXX_FLAGS} - "-march=armv9-a+sve2+sme2" + "-march=armv9-a+sve2+sme2+nosimd" "-DKLEIDICV_TARGET_SME2=1" ) endif() diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 88dfdc6bd2e6ee91d96a29e6bf1948464e45010d..b27f585826f59b5ce5d1aa7cb6cf0e7ccd0c4dcf 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1487,6 +1487,65 @@ KLEIDICV_API_DECLARATION(kleidicv_exp_f32, const float *src, size_t src_stride, float *dst, size_t dst_stride, size_t width, size_t height); +/// Converts the elements in `src` from a floating-point type to an integer +/// type, then stores the result in `dst`. +/// +/// Each resulting element is saturated, i.e. it is the smallest/largest +/// number of the type of the element if the `src` data type cannot be +/// represented as the `dst` type. In case of some special values, such as the +/// different variations of `NaN`, the result is `0`. Source and destination +/// data length is `width` * `height`. Number of elements is limited to @ref +/// KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. +/// Must not be less than width * sizeof(type). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must not be less than width * sizeof(type). +/// @param width Number of elements in a row. +/// @param height Number of rows in the data. +/// +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_f32_s8, const float *src, + size_t src_stride, int8_t *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_float_conversion_f32_s8 +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_f32_u8, const float *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); + +/// Converts the elements in `src` from an integer type to a floating-point +/// type, then stores the result in `dst`. +/// +/// Each resulting element is saturated, i.e. it is the smallest/largest +/// number of the type of the element if the `src` data type cannot be +/// represented as the `dst` type. Source and destination data length is +/// `width` * `height`. Number of elements is limited to @ref +/// KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. Must +/// not be less than width * sizeof(type). +/// Must be a multiple of sizeof(type). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. Must +/// not be less than width * sizeof(type). +/// Must be a multiple of sizeof(type). +/// @param width Number of pixels in a row. +/// @param height Number of rows in the data. +/// +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_s8_f32, const int8_t *src, + size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_float_conversion_s8_f32 +KLEIDICV_API_DECLARATION(kleidicv_float_conversion_u8_f32, const uint8_t *src, + size_t src_stride, float *dst, size_t dst_stride, + size_t width, size_t height); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index 62053bc8e72418f3dc28f51c009c8e5d46d4bd6d..0add004594adfc5058ecbfcbbced35a7bff182b1 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -349,7 +349,14 @@ static inline float32x4x4_t vld1q_x4(const float32_t *src) { return vld1q_f32_x4 // NEON store operations // ----------------------------------------------------------------------------- -static inline void vst1(uint8_t *dst, uint8x8_t vec) { vst1_u8(dst, vec); } +static inline void vst1(int8_t *dst, int8x8_t vec) { vst1_s8(dst, vec); } +static inline void vst1(uint8_t *dst, uint8x8_t vec) { vst1_u8(dst, vec); } +static inline void vst1(int16_t *dst, int16x4_t vec) { vst1_s16(dst, vec); } +static inline void vst1(uint16_t *dst, uint16x4_t vec) { vst1_u16(dst, vec); } +static inline void vst1(int32_t *dst, int32x2_t vec) { vst1_s32(dst, vec); } +static inline void vst1(uint32_t *dst, uint32x2_t vec) { vst1_u32(dst, vec); } +static inline void vst1(int64_t *dst, int64x1_t vec) { vst1_s64(dst, vec); } +static inline void vst1(uint64_t *dst, uint64x1_t vec) { vst1_u64(dst, vec); } static inline void vst1q(int8_t *dst, int8x16_t vec) { vst1q_s8(dst, vec); } static inline void vst1q(uint8_t *dst, uint8x16_t vec) { vst1q_u8(dst, vec); } @@ -433,6 +440,19 @@ static inline uint64x2_t vreinterpretq_u64(uint32x4_t vec) { return vreinterpret static inline uint64x2_t vreinterpretq_u64(int64x2_t vec) { return vreinterpretq_u64_s64(vec); } static inline uint64x2_t vreinterpretq_u64(uint64x2_t vec) { return vec; } +// ----------------------------------------------------------------------------- +// vcombine* +// ----------------------------------------------------------------------------- + +static inline int8x16_t vcombine(int8x8_t lhs, int8x8_t rhs) { return vcombine_s8(lhs, rhs); } +static inline uint8x16_t vcombine(uint8x8_t lhs, uint8x8_t rhs) { return vcombine_u8(lhs, rhs); } +static inline int16x8_t vcombine(int16x4_t lhs, int16x4_t rhs) { return vcombine_s16(lhs, rhs); } +static inline uint16x8_t vcombine(uint16x4_t lhs, uint16x4_t rhs) { return vcombine_u16(lhs, rhs); } +static inline int32x4_t vcombine(int32x2_t lhs, int32x2_t rhs) { return vcombine_s32(lhs, rhs); } +static inline uint32x4_t vcombine(uint32x2_t lhs, uint32x2_t rhs) { return vcombine_u32(lhs, rhs); } +static inline int64x2_t vcombine(int64x1_t lhs, int64x1_t rhs) { return vcombine_s64(lhs, rhs); } +static inline uint64x2_t vcombine(uint64x1_t lhs, uint64x1_t rhs) { return vcombine_u64(lhs, rhs); } + // clang-format on } // namespace kleidicv::neon diff --git a/kleidicv/src/conversions/float_conv_api.cpp b/kleidicv/src/conversions/float_conv_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0f3fdea2dcc1a00a80a99eba948063d34ea28315 --- /dev/null +++ b/kleidicv/src/conversions/float_conv_api.cpp @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/types.h" + +namespace kleidicv { + +namespace neon { + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace neon + +namespace sve2 { + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace sve2 + +namespace sme2 { + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace sme2 + +#ifdef KLEIDICV_HAVE_SVE2 +#define SVE2_FUNC_POINTER(name, itype, otype) \ + [[maybe_unused]] static auto sve2_func_##itype##_##otype = \ + kleidicv::sve2::float_conversion; +#else +#define SVE2_FUNC_POINTER(name, itype, otype) +#endif // KLEIDICV_HAVE_SVE2 + +#ifdef KLEIDICV_HAVE_SME2 +#define SME2_FUNC_POINTER(name, itype, otype) \ + static auto sme2_func_##itype##_##otype = \ + kleidicv::sme2::float_conversion; +#else +#define SME2_FUNC_POINTER(name, itype, otype) +#endif // KLEIDICV_HAVE_SME2 + +// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) +#define KLEIDICV_DEFINE_C_API(name, itype, otype) \ + static auto neon_func_##itype##_##otype = \ + kleidicv::neon::float_conversion; \ + SVE2_FUNC_POINTER(name, itype, otype); \ + SME2_FUNC_POINTER(name, itype, otype); \ + KLEIDICV_MULTIVERSION_C_API( \ + name, neon_func_##itype##_##otype, \ + KLEIDICV_SVE2_IMPL_IF(sve2_func_##itype##_##otype), \ + sme2_func_##itype##_##otype) +// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) + +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_f32_s8, float, int8_t); +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_f32_u8, float, uint8_t); +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_s8_f32, int8_t, float); +KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_u8_f32, uint8_t, float); + +} // namespace kleidicv diff --git a/kleidicv/src/conversions/float_conv_neon.cpp b/kleidicv/src/conversions/float_conv_neon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ba8c17cb6945b8a607dea5149d29e8f8de14a1bc --- /dev/null +++ b/kleidicv/src/conversions/float_conv_neon.cpp @@ -0,0 +1,169 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +template +class float_conversion_operation; + +template +class float_conversion_operation { + public: + using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits< + std::conditional_t, int32_t, uint32_t>>; + using IntermediateVectorType = typename IntermediateVecTraits::VectorType; + + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, SrcVecTraits::num_lanes()} + .unroll_twice([&](size_t step) { + SrcVectorType src_vector1 = vld1q_f32(&src[0]); + SrcVectorType src_vector2 = + vld1q_f32(&src[SrcVecTraits::num_lanes()]); + IntermediateVectorType result_vector1 = + vector_path(src_vector1); + IntermediateVectorType result_vector2 = + vector_path(src_vector2); + vst1(&dst[0], vqmovn(vcombine(vqmovn(result_vector1), + vqmovn(result_vector2)))); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) { + for (size_t index = 0; index < length; ++index) { + disable_loop_vectorization(); + float f = std::nearbyint(src[ptrdiff_t(index)]); + if (f > std::numeric_limits::max()) { + f = std::numeric_limits::max(); + } else if (f < std::numeric_limits::min()) { + f = std::numeric_limits::min(); + } + dst[index] = static_cast(f); + } + }); + } + + private: + template < + typename O, + std::enable_if_t && std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(SrcVectorType src) { + IntermediateVectorType result = vcvtnq_s32_f32(src); + return result; + } + + template < + typename O, + std::enable_if_t && !std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(SrcVectorType src) { + IntermediateVectorType result = vcvtnq_u32_f32(src); + return result; + } +}; // end of class float_conversion_operation + +template +class float_conversion_operation { + public: + using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using DstVectorType = typename DstVecTraits::VectorType; + using DstVector4Type = typename DstVecTraits::Vector4Type; + + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, SrcVecTraits::num_lanes()} + .unroll_twice([&](size_t step) { + DstVector4Type result_vector1 = + vector_path(vld1q(&src[0])); + DstVector4Type result_vector2 = + vector_path(vld1q(&src[SrcVecTraits::num_lanes()])); + vst1q_f32_x4(&dst[0], result_vector1); + vst1q_f32_x4(&dst[DstVecTraits::num_lanes() * 4], result_vector2); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) { + for (size_t index = 0; index < length; ++index) { + disable_loop_vectorization(); + InputType n = src[ptrdiff_t(index)]; + dst[ptrdiff_t(index)] = static_cast(n); + } + }); + } + + private: + template < + typename I, + std::enable_if_t && std::is_signed_v, int> = 0> + DstVector4Type vector_path(const SrcVectorType src) { + DstVector4Type dst_vect; + int16x8_t low = vmovl_s8(vget_low_s8(src)); + int16x8_t hi = vmovl_high_s8(src); + int32x4_t lowlow = vmovl_s16(vget_low_s16(low)); + int32x4_t lowhi = vmovl_high_s16(low); + int32x4_t hilow = vmovl_s16(vget_low_s16(hi)); + int32x4_t hihi = vmovl_high_s16(hi); + dst_vect.val[0] = vcvtq_f32_s32(lowlow); + dst_vect.val[1] = vcvtq_f32_s32(lowhi); + dst_vect.val[2] = vcvtq_f32_s32(hilow); + dst_vect.val[3] = vcvtq_f32_s32(hihi); + return dst_vect; + } + + template < + typename I, + std::enable_if_t && !std::is_signed_v, int> = 0> + DstVector4Type vector_path(const SrcVectorType src) { + DstVector4Type dst_vect; + uint16x8_t low = vmovl_u8(vget_low_u8(src)); + uint16x8_t hi = vmovl_high_u8(src); + uint32x4_t lowlow = vmovl_u16(vget_low_u16(low)); + uint32x4_t lowhi = vmovl_high_u16(low); + uint32x4_t hilow = vmovl_u16(vget_low_u16(hi)); + uint32x4_t hihi = vmovl_high_u16(hi); + dst_vect.val[0] = vcvtq_f32_u32(lowlow); + dst_vect.val[1] = vcvtq_f32_u32(lowhi); + dst_vect.val[2] = vcvtq_f32_u32(hilow); + dst_vect.val[3] = vcvtq_f32_u32(hihi); + return dst_vect; + } +}; // end of class float_conversion_operation + +template +kleidicv_error_t float_conversion(const InputType* src, size_t src_stride, + OutputType* dst, size_t dst_stride, + size_t width, size_t height) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + float_conversion_operation operation; + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + zip_rows(operation, rect, src_rows, dst_rows); + + return KLEIDICV_OK; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + float_conversion(const itype* src, size_t src_stride, \ + otype* dst, size_t dst_stride, size_t width, \ + size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float); + +} // namespace kleidicv::neon diff --git a/kleidicv/src/conversions/float_conv_sc.h b/kleidicv/src/conversions/float_conv_sc.h new file mode 100644 index 0000000000000000000000000000000000000000..d603d0c31bf2d5e879b3e80c90c5bfa83d1fd162 --- /dev/null +++ b/kleidicv/src/conversions/float_conv_sc.h @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_FLOAT_CONV_SC_H +#define KLEIDICV_FLOAT_CONV_SC_H + +#include +#include + +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +template +class float_conversion_operation; + +template +class float_conversion_operation { + public: + using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits< + std::conditional_t, int32_t, uint32_t>>; + using IntermediateVectorType = typename IntermediateVecTraits::VectorType; + + void process_row(size_t width, Columns src, + Columns dst) KLEIDICV_STREAMING_COMPATIBLE { + LoopUnroll{width, SrcVecTraits::num_lanes()} + .unroll_twice([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = SrcVecTraits::svptrue(); + SrcVectorType src_vector1 = svld1(pg, &src[0]); + SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1); + IntermediateVectorType result_vector1 = + vector_path(pg, src_vector1); + IntermediateVectorType result_vector2 = + vector_path(pg, src_vector2); + svst1b(pg, &dst[0], result_vector1); + svst1b_vnum(pg, &dst[0], 1, result_vector2); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE { + size_t index = 0; + svbool_t pg = SrcVecTraits::svwhilelt(index, length); + while (svptest_first(SrcVecTraits::svptrue(), pg)) { + SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]); + IntermediateVectorType result_vector = + vector_path(pg, src_vector); + svst1b(pg, &dst[ptrdiff_t(index)], result_vector); + // Update loop counter and calculate the next governing predicate. + index += SrcVecTraits::num_lanes(); + pg = SrcVecTraits::svwhilelt(index, length); + } + }); + } + + private: + template < + typename O, + std::enable_if_t && std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src) + KLEIDICV_STREAMING_COMPATIBLE { + constexpr float min_val = std::numeric_limits::min(); + constexpr float max_val = std::numeric_limits::max(); + + src = svrinti_f32_x(pg, src); + + svbool_t less = svcmplt_n_f32(pg, src, min_val); + src = svdup_n_f32_m(src, less, min_val); + + svbool_t greater = svcmpgt_n_f32(pg, src, max_val); + src = svdup_n_f32_m(src, greater, max_val); + + return svcvt_s32_f32_x(pg, src); + } + + template < + typename O, + std::enable_if_t && !std::is_signed_v, int> = 0> + IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src) + KLEIDICV_STREAMING_COMPATIBLE { + constexpr float max_val = std::numeric_limits::max(); + + src = svrinti_f32_x(pg, src); + + svbool_t greater = svcmpgt_n_f32(pg, src, max_val); + src = svdup_n_f32_m(src, greater, max_val); + + return svcvt_u32_f32_x(pg, src); + } +}; // end of class float_conversion_operation + +template +class float_conversion_operation { + public: + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, VecTraits::num_lanes()} + .unroll_twice([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = VecTraits::svptrue(); + auto src_vect1 = load_src(pg, &src[0], 0); + auto src_vect2 = load_src(pg, &src[0], 1); + + VectorType dst_vector1 = vector_path(pg, src_vect1); + VectorType dst_vector2 = vector_path(pg, src_vect2); + svst1(pg, &dst[0], dst_vector1); + svst1_vnum(pg, &dst[0], 1, dst_vector2); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE { + size_t index = 0; + svbool_t pg = VecTraits::svwhilelt(index, length); + while (svptest_first(VecTraits::svptrue(), pg)) { + auto src_vect = load_src(pg, &src[ptrdiff_t(index)], 0); + VectorType dst_vector = vector_path(pg, src_vect); + svst1(pg, &dst[ptrdiff_t(index)], dst_vector); + // Update loop counter and calculate the next governing predicate. + index += VecTraits::num_lanes(); + pg = VecTraits::svwhilelt(index, length); + } + }); + } + + private: + template , int> = 0> + VectorType vector_path(svbool_t& pg, + I src_vector) KLEIDICV_STREAMING_COMPATIBLE { + return svcvt_f32_s32_x(pg, src_vector); + } + template , int> = 0> + VectorType vector_path(svbool_t& pg, + I src_vector) KLEIDICV_STREAMING_COMPATIBLE { + return svcvt_f32_u32_x(pg, src_vector); + } + + template < + typename I, + std::enable_if_t && std::is_signed_v, int> = 0> + svint32_t load_src(svbool_t& pg, const I* src, + size_t vnum) KLEIDICV_STREAMING_COMPATIBLE { + svint32_t src_vect = svld1sb_vnum_s32(pg, src, vnum); + return src_vect; + } + + template < + typename I, + std::enable_if_t && !std::is_signed_v, int> = 0> + svuint32_t load_src(svbool_t& pg, const I* src, + size_t vnum) KLEIDICV_STREAMING_COMPATIBLE { + svuint32_t src_vect = svld1ub_vnum_u32(pg, src, vnum); + return src_vect; + } +}; // end of class float_conversion_operation + +template +static kleidicv_error_t float_conversion_sc( + const InputType* src, size_t src_stride, OutputType* dst, size_t dst_stride, + size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + float_conversion_operation operation; + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + zip_rows(operation, rect, src_rows, dst_rows); + + return KLEIDICV_OK; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_FLOAT_CONV_SC_H diff --git a/kleidicv/src/conversions/float_conv_sme2.cpp b/kleidicv/src/conversions/float_conv_sme2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49832008a06f080bdd47d0972b883f618fd801a2 --- /dev/null +++ b/kleidicv/src/conversions/float_conv_sme2.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "float_conv_sc.h" + +namespace kleidicv::sme2 { + +template +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +float_conversion(const InputType* src, size_t src_stride, OutputType* dst, + size_t dst_stride, size_t width, size_t height) { + return float_conversion_sc(src, src_stride, dst, + dst_stride, width, height); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + float_conversion(const itype* src, size_t src_stride, \ + otype* dst, size_t dst_stride, size_t width, \ + size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float); + +} // namespace kleidicv::sme2 diff --git a/kleidicv/src/conversions/float_conv_sve2.cpp b/kleidicv/src/conversions/float_conv_sve2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6bbd5d72e12ff1118dafcbe756834df90c388121 --- /dev/null +++ b/kleidicv/src/conversions/float_conv_sve2.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "float_conv_sc.h" + +namespace kleidicv::sve2 { + +template +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +float_conversion(const InputType* src, size_t src_stride, OutputType* dst, + size_t dst_stride, size_t width, size_t height) { + return float_conversion_sc(src, src_stride, dst, + dst_stride, width, height); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t \ + float_conversion(const itype* src, size_t src_stride, \ + otype* dst, size_t dst_stride, size_t width, \ + size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float); + +} // namespace kleidicv::sve2 diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f3b0dda38e7f59bb6937f74cdedfdac373bcca5 --- /dev/null +++ b/test/api/test_float_conv.cpp @@ -0,0 +1,462 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/operation.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" +#include "test_config.h" + +#define KLEIDICV_FLOAT_CONVERSION(itype, itype_name, otype, otype_name) \ + KLEIDICV_API_DIFFERENT_IO_TYPES( \ + float_conversion, kleidicv_float_conversion_##itype_name##_##otype_name, \ + itype, otype) + +KLEIDICV_FLOAT_CONVERSION(float, f32, int8_t, s8); +KLEIDICV_FLOAT_CONVERSION(float, f32, uint8_t, u8); +KLEIDICV_FLOAT_CONVERSION(int8_t, s8, float, f32); +KLEIDICV_FLOAT_CONVERSION(uint8_t, u8, float, f32); + +template +class FloatConversionTest final { + private: + template + static constexpr T min() { + return std::numeric_limits::min(); + } + + template + static constexpr T max() { + return std::numeric_limits::max(); + } + + struct Elements { + size_t width; + size_t height; + + std::vector> source_rows; + std::vector> expected_rows; + + Elements(size_t _width, size_t _height, + std::vector>&& _source_rows, + std::vector>&& _expected_rows) + : width(_width), + height(_height), + source_rows(std::move(_source_rows)), + expected_rows(std::move(_expected_rows)) {} + }; + + struct Values { + InputType source; + OutputType expected; + }; + + static float floatval(uint32_t v) { + float result; // Avoid cppcoreguidelines-init-variables. NOLINT + static_assert(sizeof(result) == sizeof(v)); + memcpy(&result, &v, sizeof(result)); + return result; + } + + const float quietNaN = std::numeric_limits::quiet_NaN(); + const float signalingNaN = std::numeric_limits::signaling_NaN(); + const float posInfinity = std::numeric_limits::infinity(); + const float negInfinity = -std::numeric_limits::infinity(); + + const float minusNaN = floatval(0xFF800001); + const float plusNaN = floatval(0x7F800001); + const float plusZero = 0.0F; + const float minusZero = -0.0F; + + const float oneNaN = floatval(0x7FC00001); + const float zeroDivZero = -std::numeric_limits::quiet_NaN(); + const float floatMin = std::numeric_limits::min(); + const float floatMax = std::numeric_limits::max(); + + const float posSubnormalMin = std::numeric_limits::denorm_min(); + const float posSubnormalMax = floatval(0x007FFFFF); + const float negSubnormalMin = -std::numeric_limits::denorm_min(); + const float negSubnormalMax = floatval(0x807FFFFF); + + template , bool> = true, + std::enable_if_t, bool> = true> + const Elements& get_custom_elements() { + static const Elements kTestElements = { + // clang-format off + 4, 8, + {{ + { quietNaN, signalingNaN, posInfinity, negInfinity }, + { minusNaN, plusNaN, plusZero, minusZero }, + { oneNaN, zeroDivZero, floatMin, floatMax }, + { posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax }, + { 1111.11, -1112.22, 113.33, 114.44 }, + { 111.51, 112.62, 113.73, 114.84 }, + { 126.66, 127.11, 128.66, 129.11 }, + { 11.5, 12.5, -11.5, -12.5 } + }}, + {{ + { 0, 0, 127, -128 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 127 }, + { 0, 0, 0, 0 }, + { 127, -128, 113, 114 }, + { 112, 113, 114, 115 }, + { 127, 127, 127, 127 }, + { 12, 12, -12, -12 } + }} + // clang-format on + }; + return kTestElements; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Elements& get_custom_elements() { + static const Elements kTestElements = { + // clang-format off + 4, 8, + {{ + { quietNaN, signalingNaN, posInfinity, negInfinity }, + { minusNaN, plusNaN, plusZero, minusZero }, + { oneNaN, zeroDivZero, floatMin, floatMax }, + { posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax }, + { 1111.11, -1112.22, 113.33, 114.44 }, + { 111.51, 112.62, 113.73, 114.84 }, + { 126.66, 127.11, 128.66, 129.11 }, + { 11.5, 12.5, -11.5, -12.5 } + }}, + {{ + { 0, 0, 255, 0 }, + { 0, 0, 0, 0 }, + { 0, 0, 0, 255 }, + { 0, 0, 0, 0 }, + { 255, 0, 113, 114 }, + { 112, 113, 114, 115 }, + { 127, 127, 129, 129 }, + { 12, 12, 0, 0 } + }} + // clang-format on + }; + return kTestElements; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Elements& get_custom_elements() { + static const Elements kTestElements = { + // clang-format off + 5, 1, + {{ + { min(), min() + 1, 0, max() - 1, max() } + }}, + {{ + { static_cast(min()), static_cast(min()) + 1.0, 0, + static_cast(max()) - 1.0, static_cast(max()) } + }} + // clang-format on + }; + return kTestElements; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + -1000.67F, -128 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + -1000.67F, 0 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + -127, -127.0 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + const Values& get_values() { + static const Values kTestValues = { + // clang-format off + 255, 255.0 + // clang-format on + }; + return kTestValues; + } + + template , bool> = true, + std::enable_if_t, bool> = true> + void calculate_expected(const test::Array2D& source, + test::Array2D& expected) { + for (size_t hindex = 0; hindex < source.height(); ++hindex) { + for (size_t vindex = 0; vindex < source.width(); ++vindex) { + O calculated = 0; + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + I result = *source.at(hindex, vindex); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + if (result > max()) { + calculated = max(); + } else if (result < min()) { + calculated = min(); + } else { + calculated = result; + } + *expected.at(hindex, vindex) = calculated; + } + } + } + + template , bool> = true, + std::enable_if_t, bool> = true> + void calculate_expected(const test::Array2D& source, + test::Array2D& expected) { + for (size_t hindex = 0; hindex < source.height(); ++hindex) { + for (size_t vindex = 0; vindex < source.width(); ++vindex) { + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + *expected.at(hindex, vindex) = *source.at(hindex, vindex); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + } + } + } + + template + size_t get_linear_height(size_t width, size_t minimum_size) { + size_t image_size = + std::max(minimum_size, static_cast(max() - min())); + size_t height = image_size / width + 1; + + return height; + } + + template + std::tuple, test::Array2D, test::Array2D> + get_linear_arrays(size_t width, size_t height) { + test::Array2D source(width, height, 1, 1); + test::Array2D expected(width, height, 1, 1); + test::Array2D actual(width, height, 1, 1); + + if constexpr (std::is_same_v && std::is_integral_v) { + test::GenerateLinearSeries generator(min()); + source.fill(generator); + } else if constexpr (std::is_integral_v && std::is_same_v) { + test::GenerateLinearSeries generator(min()); + source.fill(generator); + } else { + static_assert(sizeof(I) == 0 && sizeof(O) == 0, "should never happen"); + } + + calculate_expected(source, expected); + + return {source, expected, actual}; + } + + public: + // minimum_size set by caller to trigger the 'big' conversion path. + template + void test_linear(size_t width, size_t minimum_size = 1) { + size_t height = 0; + if constexpr (std::is_same_v && std::is_integral_v) { + height = get_linear_height(width, minimum_size); + } else if constexpr (std::is_integral_v && std::is_same_v) { + height = get_linear_height(width, minimum_size); + } else { + static_assert(sizeof(I) == 0 && sizeof(O) == 0, "should never happen"); + } + + auto arrays = get_linear_arrays(width, height); + + test::Array2D& source = std::get<0>(arrays); + test::Array2D& expected = std::get<1>(arrays); + test::Array2D& actual = std::get<2>(arrays); + + ASSERT_EQ(KLEIDICV_OK, (float_conversion()( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + void test_custom() { + auto elements_list = get_custom_elements(); + const size_t& width = elements_list.width; + const size_t& height = elements_list.height; + + test::Array2D source(width, height); + test::Array2D expected(width, height); + test::Array2D actual(width, height); + + for (size_t i = 0; i < height; i++) { + source.set(i, 0, elements_list.source_rows[i]); + expected.set(i, 0, elements_list.expected_rows[i]); + } + + ASSERT_EQ(KLEIDICV_OK, (float_conversion()( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + void test_sizing(const size_t width, const size_t height) { + auto values_list = get_values(); + + test::Array2D source(width, height, 1, 1); + test::Array2D expected(width, height, 1, 1); + test::Array2D actual(width, height, 1, 1); + + source.fill(values_list.source); + expected.fill(values_list.expected); + + actual.fill(0); + + ASSERT_EQ(KLEIDICV_OK, (float_conversion()( + source.data(), source.stride(), actual.data(), + actual.stride(), width, height))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } +}; // end of class FloatConversionTest + +template +class FloatConversion : public testing::Test {}; + +using ElementTypes = + ::testing::Types, std::pair, + std::pair, std::pair>; + +// Tests kleidicv_float_conversion API. +TYPED_TEST_SUITE(FloatConversion, ElementTypes); + +TYPED_TEST(FloatConversion, NullPointer) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + InputType src[1] = {}; + OutputType dst[1]; + test::test_null_args(float_conversion(), src, + sizeof(InputType), dst, sizeof(OutputType), 1, 1); +} + +TYPED_TEST(FloatConversion, OversizeImage) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + InputType src[1] = {}; + OutputType dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + (float_conversion()( + src, sizeof(InputType), dst, sizeof(OutputType), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1))); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + (float_conversion()( + src, sizeof(InputType), dst, sizeof(OutputType), 1, + KLEIDICV_MAX_IMAGE_PIXELS + 1))); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + (float_conversion()( + src, sizeof(TypeParam), dst, sizeof(OutputType), + KLEIDICV_MAX_IMAGE_PIXELS + 1, KLEIDICV_MAX_IMAGE_PIXELS + 1))); +} + +TYPED_TEST(FloatConversion, Scalar) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{} + .template test_linear( + test::Options::vector_length() - 1); +} +TYPED_TEST(FloatConversion, Vector) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{} + .template test_linear( + test::Options::vector_length() * 2); +} +TYPED_TEST(FloatConversion, Custom) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_custom(); +} +TYPED_TEST(FloatConversion, SizingFits128VectorSize) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(4, 1); +} +TYPED_TEST(FloatConversion, SizingFits128VectorSize2x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(4, 2); +} +TYPED_TEST(FloatConversion, SizingFits128VectorSize3x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(12, 1); +} +TYPED_TEST(FloatConversion, SizingFits512VectorSize) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(4, 4); +} +TYPED_TEST(FloatConversion, SizingFits512VectorSize2x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(16, 2); +} +TYPED_TEST(FloatConversion, SizingFits512VectorSize3x) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(6, 8); +} +TYPED_TEST(FloatConversion, Sizing128OneRemaining) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(1, 17); +} +TYPED_TEST(FloatConversion, Sizing128AllButOneRemaining) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(5, 3); +} +TYPED_TEST(FloatConversion, SizingAboutHalfRemaining) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(38, 1); +} +TYPED_TEST(FloatConversion, SizingEmpty) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(0, 0); +} +TYPED_TEST(FloatConversion, SizingOne) { + using InputType = typename TypeParam::first_type; + using OutputType = typename TypeParam::second_type; + FloatConversionTest{}.test_sizing(1, 1); +}