diff --git a/CHANGELOG.md b/CHANGELOG.md index 831a0eb8513a8fed1e853aa38829ee923bda3903..7c04ee50f3b7e61b5314a019ad25db1d265a2e91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ This changelog aims to follow the guiding principles of ## 0.2.0 - not yet released ### Added +- Exponential function for float. ### Fixed diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index b94c451e0a743b44758576a4722d5d38063aa697..380931a3cc3d334049c266735c62ee3006c343e0 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -706,4 +706,9 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, return CV_HAL_ERROR_NOT_IMPLEMENTED; } +int exp32f(const float *src, float *dst, int len) { + return convert_error(kleidicv_exp_f32(src, len * sizeof(float), dst, + len * sizeof(float), len, 1)); +} + } // namespace kleidicv::hal diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index a25be77194304c9dfe8896a75188837085c2dacc..25f1b9587c6dddb3eee33efb0323b85a3153ffa4 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -97,6 +97,8 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, uchar *dst_data, size_t dst_step, int dst_depth, int width, int height, double scale, double shift); +int exp32f(const float *src, float *dst, int len); + } // namespace hal } // namespace kleidicv @@ -313,6 +315,14 @@ static inline int kleidicv_convertTo_with_fallback( #define cv_hal_convertTo kleidicv_convertTo_with_fallback #endif // defined(cv_hal_convertTo) +// exp32f +static inline int kleidicv_exp32f_with_fallback(const float *src, float *dst, + int len) { + return KLEIDICV_HAL_FALLBACK_FORWARD(exp32f, cv_hal_exp32f, src, dst, len); +} +#undef cv_hal_exp32f +#define cv_hal_exp32f kleidicv_exp32f_with_fallback + #endif // OPENCV_CORE_HAL_REPLACEMENT_HPP // Remove no longer needed macro definitions. diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 06912c661efd02d52d4f9fabf39644410e7691e6..88a80ff36d78da66b541b9f684638db21d96f7e1 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -34,6 +34,7 @@ add_executable( test_min_max.cpp test_rgb2yuv.cpp test_sobel.cpp + test_exp.cpp ) target_link_libraries( @@ -72,6 +73,7 @@ add_executable( test_min_max.cpp test_rgb2yuv.cpp test_sobel.cpp + test_exp.cpp ) target_link_libraries( diff --git a/conformity/opencv/test_exp.cpp b/conformity/opencv/test_exp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eb97c96ac5d4c23758498efc0afdc1e83c2a0da8 --- /dev/null +++ b/conformity/opencv/test_exp.cpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "test_exp.h" + +#include +#include +#include + +#include "opencv2/core/hal/interface.h" + +static cv::Mat exec_exp(cv::Mat& input_mat) { + cv::Mat result; + cv::exp(input_mat, result); + return result; +} + +#if MANAGER +template +bool test_exp(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input_mat(x, y, CV_32FC(Channels)); + // Use inputs where results are not flowing over or under + rng.fill(input_mat, cv::RNG::UNIFORM, -80.0, 80.0); + cv::Mat actual_mat = exec_exp(input_mat); + cv::Mat expected_mat = get_expected_from_subordinate( + index, request_queue, reply_queue, input_mat); + + // OpenCV works with less precision, so a relatively big expected error + // range is defined + if (are_float_matrices_different(1.5, expected_mat, actual_mat)) { + fail_print_matrices(x, y, input_mat, actual_mat, expected_mat); + } + } + } + + return false; +} +#endif + +std::vector& exp_tests_get() { + // clang-format off + static std::vector tests = { + TEST("Exp float, 1 channel", (test_exp<1>), exec_exp), + TEST("Exp float, 2 channel", (test_exp<2>), exec_exp), + TEST("Exp float, 3 channel", (test_exp<3>), exec_exp), + TEST("Exp float, 4 channel", (test_exp<4>), exec_exp), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/test_exp.h b/conformity/opencv/test_exp.h new file mode 100644 index 0000000000000000000000000000000000000000..1da69069511dbc4d129bf4526b34eb44e04559af --- /dev/null +++ b/conformity/opencv/test_exp.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_EXP_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_EXP_H_ + +#include + +#include "tests.h" + +std::vector& exp_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_EXP_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 257f00d7b32b5b7d52179ced0a3dce80ef371c1d..657d5551e77f5e3da6e021a7c3ed6b5616ce6353 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -10,6 +10,7 @@ #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" +#include "test_exp.h" #include "test_gaussian_blur.h" #include "test_min_max.h" #include "test_rgb2yuv.h" @@ -30,6 +31,7 @@ std::vector all_tests = merge_tests({ min_max_tests_get, rgb2yuv_tests_get, sobel_tests_get, + exp_tests_get, }); #if MANAGER diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index 073ee69997ac7549d72cd13d5e8a343d4fac72ef..cce5aefc46873029b6984591c514c6bdc2e0aea4 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -17,13 +17,45 @@ static auto abs_diff(T a, T b) { return a > b ? a - b : b - a; } -template -bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) { +static inline bool check_matrix_size_and_type(cv::Mat& A, cv::Mat& B) { if (A.rows != B.rows || A.cols != B.cols || A.type() != B.type()) { std::cout << "Matrix size/type mismatch" << std::endl; return true; } + return false; +} + +// Expected matrix should not contain zeros +template +bool are_float_matrices_different(T threshold_percent, cv::Mat& exp, + cv::Mat& act) { + if (check_matrix_size_and_type(exp, act)) { + return true; + } + + for (int i = 0; i < exp.rows; ++i) { + for (int j = 0; j < (exp.cols * CV_MAT_CN(exp.type())); ++j) { + T diff = abs_diff(exp.at(i, j), act.at(i, j)); + T diff_percentage = (diff / std::abs(exp.at(i, j))) * 100; + if (diff_percentage > threshold_percent) { + std::cout << "=== Mismatch at: " << i << " " << j << std::endl + << "Relative diff: " << diff_percentage << std::endl + << std::endl; + return true; + } + } + } + + return false; +} + +template +bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) { + if (check_matrix_size_and_type(A, B)) { + return true; + } + for (int i = 0; i < A.rows; ++i) { for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) { if (abs_diff(A.at(i, j), B.at(i, j)) > threshold) { diff --git a/doc/functionality.md b/doc/functionality.md index a9d7c45c05ff6cf6587be31f6d2fda07a67569ee..0edc0259ad718c8f5d1bb6b4fa4945631b56f20d 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -8,16 +8,17 @@ SPDX-License-Identifier: Apache-2.0 Note: functions listed here are not necessarily exposed to adapter API layer. See `doc/opencv.md` for details of the functionality available in OpenCV. -## Basic arithmetic operations -| | s8 | u8 | s16 | u16 | s32 | u32 | s64 | u64 | -|------------------------------|-----|-----|-----|-----|-----|-----|-----|-----| -| Saturating Add | x | x | x | x | x | x | x | x | -| Saturating Sub | x | x | x | x | x | x | x | x | -| Saturating Absdiff | x | x | x | x | x | | | | -| Saturating Multiply | x | x | x | x | x | | | | -| Threshold binary | | x | | | | | | | -| SaturatingAddAbsWithThreshold| | | x | | | | | | -| Scale | | x | | | | | | | +## Arithmetic operations +| | s8 | u8 | s16 | u16 | s32 | u32 | s64 | u64 | f32 | f64 | +|------------------------------|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----| +| Exp | | | | | | | | | x | | +| Saturating Add | x | x | x | x | x | x | x | x | | | +| Saturating Sub | x | x | x | x | x | x | x | x | | | +| Saturating Absdiff | x | x | x | x | x | | | | | | +| Saturating Multiply | x | x | x | x | x | | | | | | +| Threshold binary | | x | | | | | | | | | +| SaturatingAddAbsWithThreshold| | | x | | | | | | | | +| Scale | | x | | | | | | | | | ## Color conversions | | u8 | diff --git a/doc/opencv.md b/doc/opencv.md index 8dccc17fa257eaf56dceebfc4c8fcbe0e741a54a..99e287c566a611e793b08bf4ef240a0ea2cc8ea9 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -168,3 +168,6 @@ Notes on parameters: ### `convertTo` Currently converting to different data types is not supported. This function scales given input of `src_depth == CV_8U` using `scale` and `shift`. + +### `exp` +Exponential function. Currently only `CV_32F` type is supported. diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index cb0381c23b314a00ee4bec749f5fdfd39e92b44a..cb6fe639a86e42c87409ec6f955c71c7005835e2 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1306,6 +1306,32 @@ KLEIDICV_API_DECLARATION(kleidicv_scale_u8, const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, float scale, float shift); +/// Exponential function, input is the elements in `src`, output is the elements +/// in `dst`. +/// +/// In case of 'float' type the maximum error is 0.36565+0.5 ULP, or the error +/// of the toolchain's expf implementation, if it is bigger. +/// +/// Source and destination data length is `width` * `height`. Number of elements +/// is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. Must +/// not be less than width * sizeof(type). +/// Must be a multiple of sizeof(type). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. Must +/// not be less than width * sizeof(type). +/// Must be a multiple of sizeof(type). +/// @param width Number of pixels in a row. +/// @param height Number of rows in the data. +/// +KLEIDICV_API_DECLARATION(kleidicv_exp_f32, const float *src, size_t src_stride, + float *dst, size_t dst_stride, size_t width, + size_t height); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index 22ef062f101f4527f2a8c0654bd198c530b1cf19..ed11762fbeb7e7f5fc442a9252c9283620755086 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -343,59 +343,65 @@ static inline float32x4x4_t vld1q_x4(const float32_t *src) { return vld1q_f32_x4 static inline void vst1(uint8_t *dst, uint8x8_t vec) { vst1_u8(dst, vec); } -static inline void vst1q(int8_t *dst, int8x16_t vec) { vst1q_s8(dst, vec); } -static inline void vst1q(uint8_t *dst, uint8x16_t vec) { vst1q_u8(dst, vec); } -static inline void vst1q(int16_t *dst, int16x8_t vec) { vst1q_s16(dst, vec); } -static inline void vst1q(uint16_t *dst, uint16x8_t vec) { vst1q_u16(dst, vec); } -static inline void vst1q(int32_t *dst, int32x4_t vec) { vst1q_s32(dst, vec); } -static inline void vst1q(uint32_t *dst, uint32x4_t vec) { vst1q_u32(dst, vec); } -static inline void vst1q(int64_t *dst, int64x2_t vec) { vst1q_s64(dst, vec); } -static inline void vst1q(uint64_t *dst, uint64x2_t vec) { vst1q_u64(dst, vec); } - -static inline void vst2q(int8_t *dst, int8x16x2_t vec) { vst2q_s8(dst, vec); } -static inline void vst2q(uint8_t *dst, uint8x16x2_t vec) { vst2q_u8(dst, vec); } -static inline void vst2q(int16_t *dst, int16x8x2_t vec) { vst2q_s16(dst, vec); } -static inline void vst2q(uint16_t *dst, uint16x8x2_t vec) { vst2q_u16(dst, vec); } -static inline void vst2q(int32_t *dst, int32x4x2_t vec) { vst2q_s32(dst, vec); } -static inline void vst2q(uint32_t *dst, uint32x4x2_t vec) { vst2q_u32(dst, vec); } -static inline void vst2q(int64_t *dst, int64x2x2_t vec) { vst2q_s64(dst, vec); } -static inline void vst2q(uint64_t *dst, uint64x2x2_t vec) { vst2q_u64(dst, vec); } - -static inline void vst3q(int8_t *dst, int8x16x3_t vec) { vst3q_s8(dst, vec); } -static inline void vst3q(uint8_t *dst, uint8x16x3_t vec) { vst3q_u8(dst, vec); } -static inline void vst3q(int16_t *dst, int16x8x3_t vec) { vst3q_s16(dst, vec); } -static inline void vst3q(uint16_t *dst, uint16x8x3_t vec) { vst3q_u16(dst, vec); } -static inline void vst3q(int32_t *dst, int32x4x3_t vec) { vst3q_s32(dst, vec); } -static inline void vst3q(uint32_t *dst, uint32x4x3_t vec) { vst3q_u32(dst, vec); } -static inline void vst3q(int64_t *dst, int64x2x3_t vec) { vst3q_s64(dst, vec); } -static inline void vst3q(uint64_t *dst, uint64x2x3_t vec) { vst3q_u64(dst, vec); } - -static inline void vst4q(int8_t *dst, int8x16x4_t vec) { vst4q_s8(dst, vec); } -static inline void vst4q(uint8_t *dst, uint8x16x4_t vec) { vst4q_u8(dst, vec); } -static inline void vst4q(int16_t *dst, int16x8x4_t vec) { vst4q_s16(dst, vec); } -static inline void vst4q(uint16_t *dst, uint16x8x4_t vec) { vst4q_u16(dst, vec); } -static inline void vst4q(int32_t *dst, int32x4x4_t vec) { vst4q_s32(dst, vec); } -static inline void vst4q(uint32_t *dst, uint32x4x4_t vec) { vst4q_u32(dst, vec); } -static inline void vst4q(int64_t *dst, int64x2x4_t vec) { vst4q_s64(dst, vec); } -static inline void vst4q(uint64_t *dst, uint64x2x4_t vec) { vst4q_u64(dst, vec); } - -static inline void vst1q_x2(int8_t *dst, int8x16x2_t vec) { vst1q_s8_x2(dst, vec); } -static inline void vst1q_x2(uint8_t *dst, uint8x16x2_t vec) { vst1q_u8_x2(dst, vec); } -static inline void vst1q_x2(int16_t *dst, int16x8x2_t vec) { vst1q_s16_x2(dst, vec); } -static inline void vst1q_x2(uint16_t *dst, uint16x8x2_t vec) { vst1q_u16_x2(dst, vec); } -static inline void vst1q_x2(int32_t *dst, int32x4x2_t vec) { vst1q_s32_x2(dst, vec); } -static inline void vst1q_x2(uint32_t *dst, uint32x4x2_t vec) { vst1q_u32_x2(dst, vec); } -static inline void vst1q_x2(int64_t *dst, int64x2x2_t vec) { vst1q_s64_x2(dst, vec); } -static inline void vst1q_x2(uint64_t *dst, uint64x2x2_t vec) { vst1q_u64_x2(dst, vec); } - -static inline void vst1q_x4(int8_t *dst, int8x16x4_t vec) { vst1q_s8_x4(dst, vec); } -static inline void vst1q_x4(uint8_t *dst, uint8x16x4_t vec) { vst1q_u8_x4(dst, vec); } -static inline void vst1q_x4(int16_t *dst, int16x8x4_t vec) { vst1q_s16_x4(dst, vec); } -static inline void vst1q_x4(uint16_t *dst, uint16x8x4_t vec) { vst1q_u16_x4(dst, vec); } -static inline void vst1q_x4(int32_t *dst, int32x4x4_t vec) { vst1q_s32_x4(dst, vec); } -static inline void vst1q_x4(uint32_t *dst, uint32x4x4_t vec) { vst1q_u32_x4(dst, vec); } -static inline void vst1q_x4(int64_t *dst, int64x2x4_t vec) { vst1q_s64_x4(dst, vec); } -static inline void vst1q_x4(uint64_t *dst, uint64x2x4_t vec) { vst1q_u64_x4(dst, vec); } +static inline void vst1q(int8_t *dst, int8x16_t vec) { vst1q_s8(dst, vec); } +static inline void vst1q(uint8_t *dst, uint8x16_t vec) { vst1q_u8(dst, vec); } +static inline void vst1q(int16_t *dst, int16x8_t vec) { vst1q_s16(dst, vec); } +static inline void vst1q(uint16_t *dst, uint16x8_t vec) { vst1q_u16(dst, vec); } +static inline void vst1q(int32_t *dst, int32x4_t vec) { vst1q_s32(dst, vec); } +static inline void vst1q(uint32_t *dst, uint32x4_t vec) { vst1q_u32(dst, vec); } +static inline void vst1q(int64_t *dst, int64x2_t vec) { vst1q_s64(dst, vec); } +static inline void vst1q(uint64_t *dst, uint64x2_t vec) { vst1q_u64(dst, vec); } +static inline void vst1q(float32_t *dst, float32x4_t vec) { vst1q_f32(dst, vec); } + +static inline void vst2q(int8_t *dst, int8x16x2_t vec) { vst2q_s8(dst, vec); } +static inline void vst2q(uint8_t *dst, uint8x16x2_t vec) { vst2q_u8(dst, vec); } +static inline void vst2q(int16_t *dst, int16x8x2_t vec) { vst2q_s16(dst, vec); } +static inline void vst2q(uint16_t *dst, uint16x8x2_t vec) { vst2q_u16(dst, vec); } +static inline void vst2q(int32_t *dst, int32x4x2_t vec) { vst2q_s32(dst, vec); } +static inline void vst2q(uint32_t *dst, uint32x4x2_t vec) { vst2q_u32(dst, vec); } +static inline void vst2q(int64_t *dst, int64x2x2_t vec) { vst2q_s64(dst, vec); } +static inline void vst2q(uint64_t *dst, uint64x2x2_t vec) { vst2q_u64(dst, vec); } +static inline void vst2q(float32_t *dst, float32x4x2_t vec) { vst2q_f32(dst, vec); } + +static inline void vst3q(int8_t *dst, int8x16x3_t vec) { vst3q_s8(dst, vec); } +static inline void vst3q(uint8_t *dst, uint8x16x3_t vec) { vst3q_u8(dst, vec); } +static inline void vst3q(int16_t *dst, int16x8x3_t vec) { vst3q_s16(dst, vec); } +static inline void vst3q(uint16_t *dst, uint16x8x3_t vec) { vst3q_u16(dst, vec); } +static inline void vst3q(int32_t *dst, int32x4x3_t vec) { vst3q_s32(dst, vec); } +static inline void vst3q(uint32_t *dst, uint32x4x3_t vec) { vst3q_u32(dst, vec); } +static inline void vst3q(int64_t *dst, int64x2x3_t vec) { vst3q_s64(dst, vec); } +static inline void vst3q(uint64_t *dst, uint64x2x3_t vec) { vst3q_u64(dst, vec); } +static inline void vst3q(float32_t *dst, float32x4x3_t vec) { vst3q_f32(dst, vec); } + +static inline void vst4q(int8_t *dst, int8x16x4_t vec) { vst4q_s8(dst, vec); } +static inline void vst4q(uint8_t *dst, uint8x16x4_t vec) { vst4q_u8(dst, vec); } +static inline void vst4q(int16_t *dst, int16x8x4_t vec) { vst4q_s16(dst, vec); } +static inline void vst4q(uint16_t *dst, uint16x8x4_t vec) { vst4q_u16(dst, vec); } +static inline void vst4q(int32_t *dst, int32x4x4_t vec) { vst4q_s32(dst, vec); } +static inline void vst4q(uint32_t *dst, uint32x4x4_t vec) { vst4q_u32(dst, vec); } +static inline void vst4q(int64_t *dst, int64x2x4_t vec) { vst4q_s64(dst, vec); } +static inline void vst4q(uint64_t *dst, uint64x2x4_t vec) { vst4q_u64(dst, vec); } +static inline void vst4q(float32_t *dst, float32x4x4_t vec) { vst4q_f32(dst, vec); } + +static inline void vst1q_x2(int8_t *dst, int8x16x2_t vec) { vst1q_s8_x2(dst, vec); } +static inline void vst1q_x2(uint8_t *dst, uint8x16x2_t vec) { vst1q_u8_x2(dst, vec); } +static inline void vst1q_x2(int16_t *dst, int16x8x2_t vec) { vst1q_s16_x2(dst, vec); } +static inline void vst1q_x2(uint16_t *dst, uint16x8x2_t vec) { vst1q_u16_x2(dst, vec); } +static inline void vst1q_x2(int32_t *dst, int32x4x2_t vec) { vst1q_s32_x2(dst, vec); } +static inline void vst1q_x2(uint32_t *dst, uint32x4x2_t vec) { vst1q_u32_x2(dst, vec); } +static inline void vst1q_x2(int64_t *dst, int64x2x2_t vec) { vst1q_s64_x2(dst, vec); } +static inline void vst1q_x2(uint64_t *dst, uint64x2x2_t vec) { vst1q_u64_x2(dst, vec); } +static inline void vst1q_x2(float32_t *dst, float32x4x2_t vec) { vst1q_f32_x2(dst, vec); } + +static inline void vst1q_x4(int8_t *dst, int8x16x4_t vec) { vst1q_s8_x4(dst, vec); } +static inline void vst1q_x4(uint8_t *dst, uint8x16x4_t vec) { vst1q_u8_x4(dst, vec); } +static inline void vst1q_x4(int16_t *dst, int16x8x4_t vec) { vst1q_s16_x4(dst, vec); } +static inline void vst1q_x4(uint16_t *dst, uint16x8x4_t vec) { vst1q_u16_x4(dst, vec); } +static inline void vst1q_x4(int32_t *dst, int32x4x4_t vec) { vst1q_s32_x4(dst, vec); } +static inline void vst1q_x4(uint32_t *dst, uint32x4x4_t vec) { vst1q_u32_x4(dst, vec); } +static inline void vst1q_x4(int64_t *dst, int64x2x4_t vec) { vst1q_s64_x4(dst, vec); } +static inline void vst1q_x4(uint64_t *dst, uint64x2x4_t vec) { vst1q_u64_x4(dst, vec); } +static inline void vst1q_x4(float32_t *dst, float32x4x4_t vec) { vst1q_f32_x4(dst, vec); } // ----------------------------------------------------------------------------- // vreinterpret* diff --git a/kleidicv/src/arithmetics/exp_api.cpp b/kleidicv/src/arithmetics/exp_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..39c492719c72a09e4b958b5007f59e945c0821e2 --- /dev/null +++ b/kleidicv/src/arithmetics/exp_api.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/types.h" + +namespace kleidicv { + +namespace neon { + +template +kleidicv_error_t exp(const T* src, size_t src_stride, T* dst, size_t dst_stride, + size_t width, size_t height); + +} // namespace neon + +} // namespace kleidicv + +#define KLEIDICV_DEFINE_C_API(name, type) \ + KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::exp, nullptr, \ + nullptr) + +KLEIDICV_DEFINE_C_API(kleidicv_exp_f32, float); diff --git a/kleidicv/src/arithmetics/exp_neon.cpp b/kleidicv/src/arithmetics/exp_neon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5414247934e4de4318206d607aaef104c0048e74 --- /dev/null +++ b/kleidicv/src/arithmetics/exp_neon.cpp @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +template +class Exp; + +template <> +class Exp final : public UnrollOnce { + public: + using VecTraits = neon::VecTraits; + using VectorType = typename VecTraits::VectorType; + + VectorType vector_path(VectorType src) { + float32x4_t n, r, scale, poly, absn, z; + uint32x4_t cmp, e; + + /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)] + x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ + z = vfmaq_f32(vdupq_n(kShift), src, vdupq_n(kInvLn2)); + n = z - vdupq_n(kShift); + r = vfmaq_f32(src, n, vdupq_n(-kLn2Hi)); + r = vfmaq_f32(r, n, vdupq_n(-kLn2Lo)); + e = vreinterpretq_u32_f32(z) << 23; + scale = vreinterpretq_f32_u32(e + vdupq_n(0x3f800000)); + absn = vabsq_f32(n); + cmp = absn > vdupq_n(126.0F); + poly = vfmaq_f32(vdupq_n(kPoly[1]), vdupq_n(kPoly[0]), r); + poly = vfmaq_f32(vdupq_n(kPoly[2]), poly, r); + poly = vfmaq_f32(vdupq_n(kPoly[3]), poly, r); + poly = vfmaq_f32(vdupq_n(kPoly[4]), poly, r); + poly = vfmaq_f32(vdupq_n(1.0F), poly, r); + poly = vfmaq_f32(vdupq_n(1.0F), poly, r); + if (KLEIDICV_UNLIKELY(v_any_u32(cmp))) { + return specialcase(poly, n, e, absn); + } + return scale * poly; + } + + float scalar_path(float src) { return expf(src); } + + private: + static int v_any_u32(uint32x4_t x) { + /* assume elements in x are either 0 or -1u. */ + return vpaddd_u64(vreinterpretq_u64_u32(x)) != 0; + } + + static float32x4_t specialcase(float32x4_t poly, float32x4_t n, uint32x4_t e, + float32x4_t absn) { + /* 2^n may overflow, break it up into s1*s2. */ + uint32x4_t b = (n <= vdupq_n(0.0F)) & vdupq_n(0x83000000); + float32x4_t s1 = vreinterpretq_f32_u32(vdupq_n(0x7f000000) + b); + float32x4_t s2 = vreinterpretq_f32_u32(e - b); + uint32x4_t cmp = absn > vdupq_n(192.0F); + float32x4_t r1 = s1 * s1; + float32x4_t r0 = poly * s1 * s2; + return vreinterpretq_f32_u32((cmp & vreinterpretq_u32_f32(r1)) | + (~cmp & vreinterpretq_u32_f32(r0))); + } + + static constexpr float kShift = 0x1.8p23F; + static constexpr float kInvLn2 = 0x1.715476p+0F; + static constexpr float kLn2Hi = 0x1.62e4p-1F; + static constexpr float kLn2Lo = 0x1.7f7d1cp-20F; + static constexpr float kPoly[] = { + /* maxerr: 0.36565 +0.5 ulp. */ + 0x1.6a6000p-10F, 0x1.12718ep-7F, 0x1.555af0p-5F, + 0x1.555430p-3F, 0x1.fffff4p-2F, + }; +}; // end of class Exp + +template +kleidicv_error_t exp(const T* src, size_t src_stride, T* dst, size_t dst_stride, + size_t width, size_t height) { + CHECK_POINTER_AND_STRIDE(src, src_stride); + CHECK_POINTER_AND_STRIDE(dst, dst_stride); + CHECK_IMAGE_SIZE(width, height); + + Exp operation; + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + return KLEIDICV_OK; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t exp( \ + const type* src, size_t src_stride, type* dst, size_t dst_stride, \ + size_t width, size_t height) + +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::neon diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index a0982daffd444003b7924cef0c1009d1719f93f1..00ab6b07b78646ed827f49c1e0639bbdd868770a 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -54,6 +54,7 @@ CORE_TEST_PATTERNS=( '*Core_Transpose*' '*Core_MinMaxLoc*' '*Core_ConvertScale*' + '*Core_Exp*' ) CORE_TEST_PATTERNS_STR="$(join_strings_with_colon "${CORE_TEST_PATTERNS[*]}")" ../../../conformity/opencv_kleidicv/bin/opencv_test_core \ diff --git a/test/api/test_exp.cpp b/test/api/test_exp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9347b21c10716c7e8729fbbab66ac6f92b42cb60 --- /dev/null +++ b/test/api/test_exp.cpp @@ -0,0 +1,239 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "framework/generator.h" +#include "framework/operation.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" + +#define KLEIDICV_EXP(type, suffix) \ + KLEIDICV_API(exp, kleidicv_exp_##suffix, type) + +KLEIDICV_EXP(float, f32); + +static void check_1ulp_error(test::Array2D& expected_array, + test::Array2D& actual_array) { + for (size_t i = 0; i < expected_array.height(); ++i) { + for (size_t j = 0; j < expected_array.width(); ++j) { + // Seems like clang-tidy does not understand what the fill member function + // of test::Array2D does, so these exceptions are required + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + float expected = *(expected_array.at(i, j)); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + + float actual = *(actual_array.at(i, j)); + // Error of 1 ULP means that actual is either same as expected, or + // the next float value in negative of positive direction + EXPECT_EQ(std::nextafterf(actual, expected), expected); + } + } +} + +template +class ExpTestSpecial; + +template <> +class ExpTestSpecial final : public UnaryOperationTest { + using ElementType = float; + using Elements = typename UnaryOperationTest::Elements; + + public: + ExpTestSpecial() : test_elements_(input_values().size()) { + auto inputs = input_values(); + + for (size_t i = 0; i < inputs.size(); ++i) { + test_elements_[i].values[0] = inputs[i]; + // Expected values calculated as doubles to have 'perfect' references. + // As the NEON implementation reuses the toolchain's expf implementation + // for the tail path, the test expects that the error for expf is also + // less than 1 ULP. + test_elements_[i].values[1] = + static_cast(exp(static_cast(inputs[i]))); + } + } + + private: + static const std::vector& input_values() { + static const std::vector kInputValues = { + -105.31, -100.07, -81.012, -47.66, -3.1088, -0.21, + 0.7, 6.2, 39.7201, 86.11, 88.947}; + + return kInputValues; + } + + kleidicv_error_t call_api() override { + return exp()( + this->inputs_[0].data(), this->inputs_[0].stride(), + this->actual_[0].data(), this->actual_[0].stride(), this->width(), + this->height()); + } + + void check(kleidicv_error_t err) override { + EXPECT_EQ(KLEIDICV_OK, err); + check_1ulp_error(this->expected_[0], this->actual_[0]); + } + + const std::vector& test_elements() override { + return test_elements_; + } + + void setup() override { + // Default input value is 0.0, so the default expected value needs to be set + // to 1.0 + ElementType expected = 1.0; + this->expected_[0].fill(expected); + UnaryOperationTest::setup(); + } + + std::vector test_elements_; +}; // end of class ExpTestSpecial + +template +class ExpTestCustomBase; + +template <> +class ExpTestCustomBase { + protected: + static void fill_expected(test::Array2D& input, + test::Array2D& expected) { + for (size_t i = 0; i < input.height(); ++i) { + for (size_t j = 0; j < input.width(); ++j) { + // Expected values calculated as doubles to have 'perfect' references. + // As the NEON implementation reuses the toolchain's expf implementation + // for the tail path, the test expects that the error for expf is also + // less than 1 ULP. + + // Seems like clang-tidy does not understand what the fill member + // function of test::Array2D does, so these exceptions are required + // NOLINTBEGIN(clang-analyzer-core.CallAndMessage) + *(expected.at(i, j)) = + static_cast(exp(static_cast(*(input.at(i, j))))); + // NOLINTEND(clang-analyzer-core.CallAndMessage) + } + } + } +}; // end of class ExpTestCustomBase + +template +class ExpTestRandom; + +template <> +class ExpTestRandom final : public ExpTestCustomBase { + public: + void test() { + const size_t kWidth = test::Options::vector_length() * 16; + constexpr size_t kHeight = 16; + test::PseudoRandomNumberGenerator generator; + test::Array2D input{kWidth, kHeight}; + test::Array2D expected{kWidth, kHeight}; + test::Array2D actual{kWidth, kHeight}; + input.fill(generator); + + fill_expected(input, expected); + + EXPECT_EQ(KLEIDICV_OK, + exp()(input.data(), input.stride(), actual.data(), + actual.stride(), input.width(), input.height())); + + check_1ulp_error(expected, actual); + } +}; // end of class ExpTestRandom + +template +class ExpTestAll; + +template <> +class ExpTestAll final : public ExpTestCustomBase { + public: + void test() { + constexpr size_t kWidth = 1024; + constexpr size_t kHeight = 1024; + // Sweeping through a meaningful input range. The start value results in + // 0.0, the last value in inf. + LinearFloatGenerator generator{-104, 89}; + test::Array2D input{kWidth, kHeight}; + test::Array2D expected{kWidth, kHeight}; + test::Array2D actual{kWidth, kHeight}; + + while (generator.last_value_not_reached()) { + input.fill(generator); + + fill_expected(input, expected); + + EXPECT_EQ(KLEIDICV_OK, + exp()(input.data(), input.stride(), actual.data(), + actual.stride(), input.width(), input.height())); + + check_1ulp_error(expected, actual); + } + } + + private: + class LinearFloatGenerator : public test::Generator { + public: + LinearFloatGenerator(float start_value, float last_value) + : start_value_{start_value}, + last_value_{last_value}, + value_{start_value} {} + + void reset() override { value_ = start_value_; } + + std::optional next() override { + float current_value = value_; + value_ = std::nextafterf(value_, last_value_); + return current_value; + } + + bool last_value_not_reached() const { + return std::nextafterf(value_, last_value_) != value_; + } + + private: + float start_value_; + float last_value_; + float value_; + }; // end of class LinearFloatGenerator +}; // end of class ExpTestAll + +template +class Exp : public testing::Test {}; +using ElementTypes = ::testing::Types; +TYPED_TEST_SUITE(Exp, ElementTypes); + +TYPED_TEST(Exp, SpecialValues) { + ExpTestSpecial{}.test(); + ExpTestSpecial{} + .with_padding(test::Options::vector_length()) + .test(); +} + +TYPED_TEST(Exp, RandomValues) { ExpTestRandom{}.test(); } + +TYPED_TEST(Exp, AllValues) { + if (test::Options::are_long_running_tests_skipped()) { + GTEST_SKIP() << "Long running exp test skipped"; + } + ExpTestAll{}.test(); +} + +TYPED_TEST(Exp, OversizeImage) { + TypeParam src[1] = {}, dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + exp()(src, sizeof(TypeParam), dst, sizeof(TypeParam), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1)); + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + exp()(src, sizeof(TypeParam), dst, sizeof(TypeParam), + KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS)); +} + +TYPED_TEST(Exp, NullPointers) { + TypeParam src[1] = {}, dst[1]; + test::test_null_args(exp(), src, sizeof(TypeParam), dst, + sizeof(TypeParam), 1, 1); +} diff --git a/test/framework/test_main.cpp b/test/framework/test_main.cpp index a4d962e0204713273e927aa302115b08010114c7..4c71daa450f8719342deb2314bb388b98ce118e3 100644 --- a/test/framework/test_main.cpp +++ b/test/framework/test_main.cpp @@ -22,6 +22,7 @@ static void parse_arguments(int argc, char **argv) { static struct option long_options[] = { {"vector-length", required_argument, nullptr, 'v'}, {"seed", required_argument, nullptr, 's'}, + {"long-running-tests", no_argument, nullptr, 'l'}, {nullptr, 0, nullptr, 0} }; // clang-format on @@ -48,6 +49,10 @@ static void parse_arguments(int argc, char **argv) { Options::set_seed(std::stoull(optarg)); is_seed_set = true; break; + + case 'l': + Options::turn_on_long_running_tests(); + break; } } diff --git a/test/framework/utils.cpp b/test/framework/utils.cpp index a88f42a11ed13c39acc7670c56a47cdf88a2ed1e..bd090d73a423309564dd129c3e220c1ab22b204e 100644 --- a/test/framework/utils.cpp +++ b/test/framework/utils.cpp @@ -16,6 +16,8 @@ bool MockMallocToFail::enabled = false; +bool test::Options::are_long_running_tests_skipped_ = true; + namespace test { template diff --git a/test/framework/utils.h b/test/framework/utils.h index 9ee2ea6210022b031f1ed55996d5537d5fe5526c..e5f0e46b06538be30653cf26c181e3babcb8393b 100644 --- a/test/framework/utils.h +++ b/test/framework/utils.h @@ -67,6 +67,11 @@ class Options { // Returns seed to use. static uint64_t seed() { return seed_; } + // Whether long running tests should be skipped. + static bool are_long_running_tests_skipped() { + return are_long_running_tests_skipped_; + } + // Returns the number of lanes in a vector for a given arithmetic type. template , bool> = true> @@ -89,11 +94,18 @@ class Options { // Sets the seed. static void set_seed(uint64_t value) { seed_ = value; } + // Turns on long running tests. + static void turn_on_long_running_tests() { + are_long_running_tests_skipped_ = false; + } + private: // Vector length being tested. static size_t vector_length_; // Seed to use. static uint64_t seed_; + // Whether long running tests should be skipped. + static bool are_long_running_tests_skipped_; }; // end of class Options // Prints all the elements in a two-dimensional space.