From 03af8ffb391397adf897ecc24bd0a3c06d1a0bff Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 12 Jun 2024 10:18:47 +0200 Subject: [PATCH 1/4] Add NEON intrinsics overloads required for comparing --- kleidicv/include/kleidicv/neon_intrinsics.h | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h index 0add00459..96c1abba3 100644 --- a/kleidicv/include/kleidicv/neon_intrinsics.h +++ b/kleidicv/include/kleidicv/neon_intrinsics.h @@ -108,6 +108,18 @@ static inline uint32x4_t vaddl(uint16x4_t lhs, uint16x4_t rhs) { return vaddl_u1 static inline int64x2_t vaddl(int32x2_t lhs, int32x2_t rhs) { return vaddl_s32(lhs, rhs); } static inline uint64x2_t vaddl(uint32x2_t lhs, uint32x2_t rhs) { return vaddl_u32(lhs, rhs); } +// ----------------------------------------------------------------------------- +// vbslq* +// ----------------------------------------------------------------------------- + +static inline int8x16_t vbslq(int8x16_t a, int8x16_t b, int8x16_t c) { return vbslq_s8(a, b, c); } +static inline uint8x16_t vbslq(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vbslq_u8(a, b, c); } +static inline int16x8_t vbslq(int16x8_t a, int16x8_t b, int16x8_t c) { return vbslq_s16(a, b, c); } +static inline uint16x8_t vbslq(uint16x8_t a, uint16x8_t b, uint16x8_t c) { return vbslq_u16(a, b, c); } +static inline int32x4_t vbslq(int32x4_t a, int32x4_t b, int32x4_t c) { return vbslq_s32(a, b, c); } +static inline uint32x4_t vbslq(uint32x4_t a, uint32x4_t b, uint32x4_t c) { return vbslq_u32(a, b, c); } +static inline float32x4_t vbslq(uint32x4_t a, float32x4_t b, float32x4_t c) { return vbslq_f32(a, b, c); } + // ----------------------------------------------------------------------------- // vget_high* // ----------------------------------------------------------------------------- @@ -121,6 +133,12 @@ static inline uint32x2_t vget_high(uint32x4_t vec) { return vget_high_u32(vec); static inline int64x1_t vget_high(int64x2_t vec) { return vget_high_s64(vec); } static inline uint64x1_t vget_high(uint64x2_t vec) { return vget_high_u64(vec); } +// ----------------------------------------------------------------------------- +// vcgeq* +// ----------------------------------------------------------------------------- + +static inline uint32x4_t vcgeq(float32x4_t lhs, float32x4_t rhs) { return vcgeq_f32(lhs, rhs); } + // ----------------------------------------------------------------------------- // vget_low* // ----------------------------------------------------------------------------- @@ -182,6 +200,12 @@ static inline int32_t vmaxvq(int32x4_t src) { return vmaxvq_s32(src); } static inline uint32_t vmaxvq(uint32x4_t src) { return vmaxvq_u32(src); } static inline float32_t vmaxvq(float32x4_t src) { return vmaxvq_f32(src); } +// ----------------------------------------------------------------------------- +// vcleq* +// ----------------------------------------------------------------------------- + +static inline uint32x4_t vcleq(float32x4_t lhs, float32x4_t rhs) { return vcleq_f32(lhs, rhs); } + // ----------------------------------------------------------------------------- // vrshrn_n* // ----------------------------------------------------------------------------- -- GitLab From 4312b2d54c97a167c8fa71ff11cd94da8e3d3de3 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Wed, 12 Jun 2024 10:20:27 +0200 Subject: [PATCH 2/4] Implement inRange API for uint8 and float --- CHANGELOG.md | 1 + .../opencv/extra_benchmarks/opencv-4.9.patch | 40 ++ adapters/opencv/kleidicv_hal.cpp | 24 + adapters/opencv/kleidicv_hal.h | 31 ++ adapters/opencv/opencv-4.9.patch | 48 +- benchmark/benchmark.cpp | 19 + conformity/opencv/common.h | 16 + conformity/opencv/test_in_range.cpp | 62 +++ conformity/opencv/tests.cpp | 1 + conformity/opencv/tests.h | 1 + doc/functionality.md | 1 + doc/opencv.md | 8 + kleidicv/include/kleidicv/kleidicv.h | 32 ++ kleidicv/src/arithmetics/in_range_api.cpp | 40 ++ kleidicv/src/arithmetics/in_range_neon.cpp | 136 +++++ kleidicv/src/arithmetics/in_range_sc.h | 180 +++++++ kleidicv/src/arithmetics/in_range_sme2.cpp | 25 + kleidicv/src/arithmetics/in_range_sve2.cpp | 25 + scripts/benchmark/run_benchmarks_4K.sh | 3 + scripts/benchmark/run_benchmarks_FHD.sh | 3 + scripts/ci-opencv.sh | 1 + test/api/test_in_range.cpp | 483 ++++++++++++++++++ 22 files changed, 1178 insertions(+), 2 deletions(-) create mode 100644 conformity/opencv/test_in_range.cpp create mode 100644 kleidicv/src/arithmetics/in_range_api.cpp create mode 100644 kleidicv/src/arithmetics/in_range_neon.cpp create mode 100644 kleidicv/src/arithmetics/in_range_sc.h create mode 100644 kleidicv/src/arithmetics/in_range_sme2.cpp create mode 100644 kleidicv/src/arithmetics/in_range_sve2.cpp create mode 100644 test/api/test_in_range.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 867d936f2..4128afb3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ This changelog aims to follow the guiding principles of - Conversion from float to (u)int8_t and vice versa. - KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS configuration option. - Conversion from non-subsampled, interleaved YUV to RGB/BGR. +- InRange single channel, scalar bounds for uint8_t and float. ### Fixed diff --git a/adapters/opencv/extra_benchmarks/opencv-4.9.patch b/adapters/opencv/extra_benchmarks/opencv-4.9.patch index 3f6195ee3..00d88a24e 100644 --- a/adapters/opencv/extra_benchmarks/opencv-4.9.patch +++ b/adapters/opencv/extra_benchmarks/opencv-4.9.patch @@ -60,6 +60,46 @@ index 344d81cb8a..ef5a3aa7d2 100644 double eps = depthSrc <= CV_32S && (depthDst <= CV_32S || depthDst == CV_64F) ? 1e-12 : (FLT_EPSILON * maxValue); eps = eps * std::max(1.0, fabs(alpha)); +diff --git a/modules/core/perf/perf_inRangeScalar.cpp b/modules/core/perf/perf_inRangeScalar.cpp +new file mode 100644 +index 0000000000..9ecca30b6c +--- /dev/null ++++ b/modules/core/perf/perf_inRangeScalar.cpp +@@ -0,0 +1,34 @@ ++#include "perf_precomp.hpp" ++ ++namespace opencv_test ++{ ++using namespace perf; ++ ++typedef tuple Size_TypeSrc_lb_ub_t; ++typedef perf::TestBaseWithParam Size_TypeSrc_lb_ub; ++ ++PERF_TEST_P( Size_TypeSrc_lb_ub, inRangeScalar, ++ testing::Combine ++ ( ++ testing::Values(sz1080p, sz2160p), ++ testing::Values(CV_8UC1, CV_32FC1), ++ testing::Values(1), ++ testing::Values(2) ++ ) ++ ) ++{ ++ Size size = get<0>(GetParam()); ++ int type = get<1>(GetParam()); ++ Mat lb (1, 1, type, get<2>(GetParam())); ++ Mat ub (1, 1, type, get<3>(GetParam())); ++ Mat src(size, type); ++ Mat dst(size, CV_8UC1); ++ ++ declare.in(src, WARMUP_RNG).out(dst); ++ ++ TEST_CYCLE() inRange( src, lb, ub, dst ); ++ ++ SANITY_CHECK(dst); ++} ++ ++} // namespace diff --git a/modules/imgproc/perf/perf_blur.cpp b/modules/imgproc/perf/perf_blur.cpp index d1f5a6b1ca..c4a3a6cdc5 100644 --- a/modules/imgproc/perf/perf_blur.cpp diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index b0b779254..1fad7dc0c 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -1033,4 +1033,28 @@ int compare_u8(const uchar *src1_data, size_t src1_step, const uchar *src2_data, } } +int inRange_u8(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int dst_depth, int width, int height, int cn, + uchar lower_bound, uchar upper_bound) { + if (dst_depth != CV_8U || cn != 1) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + return convert_error(kleidicv_in_range_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height, + static_cast(lower_bound), static_cast(upper_bound))); +} + +int inRange_f32(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int dst_depth, int width, int height, int cn, + double lower_bound, double upper_bound) { + if (dst_depth != CV_8U || cn != 1) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + return convert_error(kleidicv_in_range_f32( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, width, height, + static_cast(lower_bound), static_cast(upper_bound))); +} + } // namespace kleidicv::hal diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 2505922cb..5b2e5ca86 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -129,6 +129,14 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth, int exp32f(const float *src, float *dst, int len); +int inRange_u8(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int dst_depth, int width, int height, int cn, + uchar lower_bound, uchar upper_bound); + +int inRange_f32(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int dst_depth, int width, int height, int cn, + double lower_bound, double upper_bound); + } // namespace hal } // namespace kleidicv @@ -497,6 +505,29 @@ KLEIDICV_HAL_MUL(mul16s, kleidicv_saturating_multiply_s16, int16_t); #undef cv_hal_mul16s #define cv_hal_mul16s kleidicv_mul16s_with_fallback +// inRange +static inline int kleidicv_in_range_u8_with_fallback( + const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, + int dst_depth, size_t width, size_t height, int cn, uchar lower_bound, + uchar upper_bound) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + inRange_u8, cv_hal_inRange8u, src_data, src_step, dst_data, dst_step, + dst_depth, width, height, cn, lower_bound, upper_bound); +} +#undef cv_hal_inRange8u +#define cv_hal_inRange8u kleidicv_in_range_u8_with_fallback + +static inline int kleidicv_in_range_f32_with_fallback( + const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, + int dst_depth, size_t width, size_t height, int cn, double lower_bound, + double upper_bound) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + inRange_f32, cv_hal_inRange32f, src_data, src_step, dst_data, dst_step, + dst_depth, width, height, cn, lower_bound, upper_bound); +} +#undef cv_hal_inRange32f +#define cv_hal_inRange32f kleidicv_in_range_f32_with_fallback + #endif // OPENCV_CORE_HAL_REPLACEMENT_HPP // Remove no longer needed macro definitions. diff --git a/adapters/opencv/opencv-4.9.patch b/adapters/opencv/opencv-4.9.patch index d731aa2fb..42897a636 100644 --- a/adapters/opencv/opencv-4.9.patch +++ b/adapters/opencv/opencv-4.9.patch @@ -74,6 +74,30 @@ index 6f0a83d359..4c294962ca 100644 #define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) #define CV_MAKE_TYPE CV_MAKETYPE +diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp +index 9de474b402..4757861a34 100644 +--- a/modules/core/src/arithm.cpp ++++ b/modules/core/src/arithm.cpp +@@ -1803,6 +1803,19 @@ void cv::inRange(InputArray _src, InputArray _lowerb, + + convertAndUnrollScalar( lb, src.type(), lbuf, blocksize ); + convertAndUnrollScalar( ub, src.type(), ubuf, blocksize ); ++ ++ if (depth == CV_8U) { ++ uint8_t lb_scalar = lb.at(0, 0); ++ uint8_t ub_scalar = ub.at(0, 0); ++ CALL_HAL(inRange_u8, cv_hal_inRange8u, src.data, src.step, dst.data, dst.step, dst.depth(), src.cols, src.rows, src.channels(), ++ lb_scalar, ub_scalar); ++ ++ } else if (depth == CV_32F) { ++ double lb_scalar = lb.at(0, 0); ++ double ub_scalar = ub.at(0, 0); ++ CALL_HAL(inRange_f32, cv_hal_inRange32f, src.data, src.step, dst.data, dst.step, dst.depth(), src.cols, src.rows, src.channels(), ++ lb_scalar, ub_scalar); ++ } + } + + for( size_t i = 0; i < it.nplanes; i++, ++it ) diff --git a/modules/core/src/convert.dispatch.cpp b/modules/core/src/convert.dispatch.cpp index 345b4624cb..8698cc64bf 100644 --- a/modules/core/src/convert.dispatch.cpp @@ -96,10 +120,10 @@ index 345b4624cb..8698cc64bf 100644 if( dims <= 2 ) diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp -index 1f2b259920..0e19b24f7f 100644 +index 1f2b259920..72a42d4beb 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp -@@ -818,6 +818,35 @@ inline int hal_ni_rotate90(int src_type, const uchar* src_data, size_t src_step, +@@ -818,6 +818,55 @@ inline int hal_ni_rotate90(int src_type, const uchar* src_data, size_t src_step, #define cv_hal_rotate90 hal_ni_rotate90 //! @endcond @@ -131,6 +155,26 @@ index 1f2b259920..0e19b24f7f 100644 +//! @cond IGNORED +#define cv_hal_convertTo hal_ni_convertTo +//! @endcond ++ ++/** ++ @brief inRange ++ @param src_data,src_step,src_depth Source image ++ @param dst_data,dst_step,dst_depth Destination image ++ @param width,height Source image dimensions ++ @param cn number of channels ++ @param lower_bound,upper_bound Dst values = (lower_bound <= src_value) && (src_value <= upper_bound) ? 255 : 0 ++*/ ++inline int hal_ni_inRange8u(const uchar *src_data, size_t src_step, ++ uchar *dst_data, size_t dst_step, int dst_depth, int width, ++ int height, int cn, uchar lower_bound, uchar upper_bound) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } ++inline int hal_ni_inRange32f(const uchar *src_data, size_t src_step, ++ uchar *dst_data, size_t dst_step, int dst_depth, int width, ++ int height, int cn, double lower_bound, double upper_bound) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } ++ ++//! @cond IGNORED ++#define cv_hal_inRange8u hal_ni_inRange8u ++#define cv_hal_inRange32f hal_ni_inRange32f ++//! @endcond + //! @} diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 3b881ccd3..3fb5a3c2b 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -412,3 +412,22 @@ BENCH_MORPHOLOGY(dilate, 17); BENCH_MORPHOLOGY(erode, 3); BENCH_MORPHOLOGY(erode, 5); BENCH_MORPHOLOGY(erode, 17); + +template +static void in_range(Function f, T lower_bound, T upper_bound, + benchmark::State& state) { + bench_functor(state, [f, lower_bound, upper_bound]() { + (void)f(get_source_buffer_a(), image_width * sizeof(T), + get_destination_buffer(), image_width * sizeof(uint8_t), + image_width, image_height, lower_bound, upper_bound); + }); +} + +#define BENCH_IN_RANGE(benchname, name, lower_bound, upper_bound, type) \ + static void benchname(benchmark::State& state) { \ + in_range(kleidicv_##name, lower_bound, upper_bound, state); \ + } \ + BENCHMARK(benchname) + +BENCH_IN_RANGE(in_range_u8, in_range_u8, 1, 2, uint8_t); +BENCH_IN_RANGE(in_range_f32, in_range_f32, 1.111, 1.112, float); diff --git a/conformity/opencv/common.h b/conformity/opencv/common.h index b34720445..002266df2 100644 --- a/conformity/opencv/common.h +++ b/conformity/opencv/common.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,21 @@ #define KLEIDICV_CONFORMITY_MAX_MAT_DIMENSIONS 4 +template +static constexpr T min() { + return std::numeric_limits::min(); +} + +template +static constexpr T lowest() { + return std::numeric_limits::lowest(); +} + +template +static constexpr T max() { + return std::numeric_limits::max(); +} + class ExceptionWithErrno : public std::exception { public: explicit ExceptionWithErrno(const std::string& msg) diff --git a/conformity/opencv/test_in_range.cpp b/conformity/opencv/test_in_range.cpp new file mode 100644 index 000000000..e925c80b2 --- /dev/null +++ b/conformity/opencv/test_in_range.cpp @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "opencv2/core/hal/interface.h" +#include "opencv2/imgproc/hal/interface.h" +#include "tests.h" + +template +cv::Mat exec_in_range(cv::Mat& input) { + cv::Mat result; + cv::inRange(input, LowerBound / 1000.0, UpperBound / 1000.0, result); + return result; +} + +#if MANAGER +template +bool test_in_range(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, Format); + rng.fill(input, cv::RNG::UNIFORM, 0, 255); + cv::Mat actual = exec_in_range(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +std::vector& in_range_tests_get() { + // clang-format off + static std::vector tests = { + TEST("InRange uint8, lower_bound = 1, upper_bound = 2", (test_in_range<1000, 2000, CV_8UC1>), (exec_in_range<1000, 2000>)), + TEST("InRange uint8, lower_bound = 10, upper_bound = 11", (test_in_range<10000, 11000, CV_8UC1>), (exec_in_range<10000, 11000>)), + TEST("InRange uint8, lower_bound = 3, upper_bound = 4", (test_in_range<3000, 4000, CV_8UC1>), (exec_in_range<3000, 4000>)), + TEST("InRange uint8, lower_bound = 254, upper_bound = 255", (test_in_range<254000, 255000, CV_8UC1>), (exec_in_range<254000, 255000>)), + TEST("InRange uint8, lower_bound = 13, upper_bound = 12", (test_in_range<13000, 12000, CV_8UC1>), (exec_in_range<13000, 12000>)), + TEST("InRange float, lower_bound = -999999.999, upper_bound = -999999.998", (test_in_range<(-999999999), (-999999998), CV_32FC1>), (exec_in_range<(-999999999), (-999999998)>)), + TEST("InRange float, lower_bound = -999999.999, upper_bound = -999989.999", (test_in_range<(-999999999), (-999989999), CV_32FC1>), (exec_in_range<(-999999999), (-999989999)>)), + TEST("InRange float, lower_bound = 1, upper_bound = 2", (test_in_range<1000, 2000, CV_32FC1>), (exec_in_range<1000, 2000>)), + TEST("InRange float, lower_bound = 1.111, upper_bound = 1.112", (test_in_range<1111, 1112, CV_32FC1>), (exec_in_range<1111, 1112>)), + TEST("InRange float, lower_bound = 3, upper_bound = 3", (test_in_range<3000, 3000, CV_32FC1>), (exec_in_range<3000, 3000>)), + TEST("InRange float, lower_bound = 10.001, upper_bound = 10.0011", (test_in_range<10001, 100011, CV_32FC1>), (exec_in_range<10001, 100011>)), + TEST("InRange float, lower_bound = 13.999, upper_bound = 13.998", (test_in_range<13999, 13998, CV_32FC1>), (exec_in_range<13999, 13998>)), + TEST("InRange float, lower_bound = 14.999, upper_bound = 20.998", (test_in_range<14999, 20998, CV_32FC1>), (exec_in_range<14999, 20998>)), + TEST("InRange float, lower_bound = 999999.998, upper_bound = 999999.999", (test_in_range<(999999998), (999999999), CV_32FC1>), (exec_in_range<(999999998), (999999999)>)), + TEST("InRange float, lower_bound = 999989.999, upper_bound = 999999.999", (test_in_range<(999989999), (999999999), CV_32FC1>), (exec_in_range<(999989999), (999999999)>)), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index c6cadbaf6..926de6874 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -35,6 +35,7 @@ std::vector all_tests = merge_tests({ resize_tests_get, scale_tests_get, min_max_tests_get, + in_range_tests_get, }); #if MANAGER diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index 64c0cc59d..da253cb66 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -21,5 +21,6 @@ std::vector& float_conversion_tests_get(); std::vector& resize_tests_get(); std::vector& scale_tests_get(); std::vector& min_max_tests_get(); +std::vector& in_range_tests_get(); #endif // KLEIDICV_OPENCV_CONFORMITY_TESTS_H_ diff --git a/doc/functionality.md b/doc/functionality.md index cf1bfe05b..91d1858a8 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -21,6 +21,7 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Scale | | x | | | | | | | x | | | CompareEqual | | x | | | | | | | | | | CompareGreater | | x | | | | | | | | | +| InRange | | x | | | | | | | x | | # Logical operations | | u8 | diff --git a/doc/opencv.md b/doc/opencv.md index adf9d4dc6..20c629301 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -213,3 +213,11 @@ Notes on parameters: * `operation` - flag specifying correspondence between the arrays. Supported [OpenCV cmp types](https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga0cc47ff833d40b58ecbe1d609a53d784) are: + `cv::CMP_GT` + +### `inRange` +Checks whether array elements fall between user set lower and upper bounds. Currently only scalar bounds are supported. + +Notes on parameters: +* `src_depth` - only supports `CV_8U` and `CV_32F` depths. +* `dst_depth` - only supports `CV_8U` depth. +* `cn` - only supports 1 channel. diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 03836ff2e..6a7b1b141 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1607,6 +1607,38 @@ KLEIDICV_API_DECLARATION(kleidicv_float_conversion_u8_f32, const uint8_t *src, size_t src_stride, float *dst, size_t dst_stride, size_t width, size_t height); +/// Performs a per element comparison in `src` with respect to caller defined +/// lower and upper bounds. For the elements exceeding these bounds, the +/// corresponding elements in `dst` are set to 0 and elements within to 255. +/// +/// Width and height are the same for the source and for the destination. Number +/// of elements is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. Must +/// not be less than width * sizeof(type), except for +/// single-row images. +/// @param dst Pointer to the first destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. Must +/// not be less than width * sizeof(type), except for +/// single-row images. +/// @param width Number of elements in a row. +/// @param height Number of rows in the data. +/// @param lower_bound The lower bound of the interval. +/// @param upper_bound The upper bound of the interval. +/// +KLEIDICV_API_DECLARATION(kleidicv_in_range_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, uint8_t lower_bound, + uint8_t upper_bound); +/// @copydoc kleidicv_in_range_u8 +KLEIDICV_API_DECLARATION(kleidicv_in_range_f32, const float *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, float lower_bound, + float upper_bound); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/kleidicv/src/arithmetics/in_range_api.cpp b/kleidicv/src/arithmetics/in_range_api.cpp new file mode 100644 index 000000000..13c7fcded --- /dev/null +++ b/kleidicv/src/arithmetics/in_range_api.cpp @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" + +namespace kleidicv { + +namespace neon { +template +kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + T lower_bound, T upper_bound); +} // namespace neon + +namespace sve2 { +template +kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + T lower_bound, T upper_bound); +} // namespace sve2 + +namespace sme2 { +template +kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + T lower_bound, T upper_bound); +} // namespace sme2 + +} // namespace kleidicv + +#define KLEIDICV_DEFINE_C_API(name, type) \ + KLEIDICV_MULTIVERSION_C_API( \ + name, &kleidicv::neon::in_range, \ + KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::in_range), \ + KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::in_range)) + +KLEIDICV_DEFINE_C_API(kleidicv_in_range_u8, uint8_t); +KLEIDICV_DEFINE_C_API(kleidicv_in_range_f32, float); diff --git a/kleidicv/src/arithmetics/in_range_neon.cpp b/kleidicv/src/arithmetics/in_range_neon.cpp new file mode 100644 index 000000000..d0614d926 --- /dev/null +++ b/kleidicv/src/arithmetics/in_range_neon.cpp @@ -0,0 +1,136 @@ +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +template +class InRange; + +template <> +class InRange : public UnrollTwice { + public: + using VecTraits = neon::VecTraits; + using VectorType = typename VecTraits::VectorType; + + InRange(uint8_t lower_bound, uint8_t upper_bound) + : lower_bound_vect_{vdupq_n(lower_bound)}, + upper_bound_vect_{vdupq_n(upper_bound)}, + lower_bound_{lower_bound}, + upper_bound_{upper_bound} {} + + VectorType vector_path(VectorType src) { + return vandq(vcgeq(src, lower_bound_vect_), vcleq(src, upper_bound_vect_)); + } + + // NOLINTBEGIN(readability-make-member-function-const) + uint8_t scalar_path(uint8_t src) { + return (src >= lower_bound_ && src <= upper_bound_) ? 0xFF : 0; + } + // NOLINTEND(readability-make-member-function-const) + + private: + VectorType lower_bound_vect_; + VectorType upper_bound_vect_; + uint8_t lower_bound_; + uint8_t upper_bound_; +}; // end of class InRange + +template <> +class InRange { + public: + using SrcVecTraits = neon::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using SrcVector4Type = typename SrcVecTraits::Vector4Type; + using DstVecTraits = neon::VecTraits; + using DstVectorType = typename DstVecTraits::VectorType; + + InRange(float lower_bound, float upper_bound) + : lower_bound_vect_{vdupq_n(lower_bound)}, + upper_bound_vect_{vdupq_n(upper_bound)}, + lower_bound_{lower_bound}, + upper_bound_{upper_bound} {} + + void process_row(size_t width, Columns src, + Columns dst) { + LoopUnroll{width, SrcVecTraits::num_lanes()} + .unroll_n_times<4>([&](size_t step) { + SrcVector4Type src_vector = vld1q_f32_x4(&src[0]); + DstVectorType result_vector = vector_path(src_vector); + vst1q(&dst[0], result_vector); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) { + for (size_t index = 0; index < length; ++index) { + disable_loop_vectorization(); + float f = src[ptrdiff_t(index)]; + dst[ptrdiff_t(index)] = + (f >= lower_bound_ && f <= upper_bound_) ? 0xFF : 0; + } + }); + } + + private: + DstVectorType vector_path(SrcVector4Type src) { + SrcVectorType src0 = src.val[0]; + SrcVectorType src1 = src.val[1]; + SrcVectorType src2 = src.val[2]; + SrcVectorType src3 = src.val[3]; + uint32x4_t res00 = + vandq(vcgeq(src0, lower_bound_vect_), vcleq(src0, upper_bound_vect_)); + uint32x4_t res01 = + vandq(vcgeq(src1, lower_bound_vect_), vcleq(src1, upper_bound_vect_)); + uint32x4_t res11 = + vandq(vcgeq(src2, lower_bound_vect_), vcleq(src2, upper_bound_vect_)); + uint32x4_t res12 = + vandq(vcgeq(src3, lower_bound_vect_), vcleq(src3, upper_bound_vect_)); + // AND-ing the results of the compare ops sets all 32 bits to all 0's or all + // 1's. Unzipping them twice chooses 8 bits from those 32. + uint16x8_t res0 = + vuzp1q_u16(vreinterpretq_u16_u32(res00), vreinterpretq_u16_u32(res01)); + uint16x8_t res1 = + vuzp1q_u16(vreinterpretq_u16_u32(res11), vreinterpretq_u16_u32(res12)); + return vuzp1q_u8(vreinterpretq_u8_u16(res0), vreinterpretq_u8_u16(res1)); + } + + SrcVectorType lower_bound_vect_; + SrcVectorType upper_bound_vect_; + float lower_bound_; + float upper_bound_; +}; // end of class InRange + +template +kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + T lower_bound, T upper_bound) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + InRange operation{lower_bound, upper_bound}; + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + + if constexpr (std::is_same_v) { + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } else { + zip_rows(operation, rect, src_rows, dst_rows); + } + + return KLEIDICV_OK; +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t in_range( \ + const type *src, size_t src_stride, uint8_t *dst, size_t dst_stride, \ + size_t width, size_t height, type lower_bound, type upper_bound) + +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::neon diff --git a/kleidicv/src/arithmetics/in_range_sc.h b/kleidicv/src/arithmetics/in_range_sc.h new file mode 100644 index 000000000..9b6710e08 --- /dev/null +++ b/kleidicv/src/arithmetics/in_range_sc.h @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_IN_RANGE_SC_H +#define KLEIDICV_IN_RANGE_SC_H + +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +template +class InRange; + +template <> +class InRange : public UnrollTwice { + public: + using ContextType = Context; + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + using SignedScalarType = typename std::make_signed::type; + using SignedVecTraits = + KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SignedVectorType = typename SignedVecTraits::VectorType; + + InRange(VectorType &vec_lower_bound, + VectorType &vec_upper_bound) KLEIDICV_STREAMING_COMPATIBLE + : vec_lower_bound_(vec_lower_bound), + vec_upper_bound_(vec_upper_bound) {} + + // NOLINTBEGIN(readability-make-member-function-const) + VectorType vector_path(ContextType ctx, + VectorType src) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = ctx.predicate(); + + VectorType diff_low = svsub_x(pg, src, vec_lower_bound_); + // Shift subtraction result 7 bits to the right, i.e. divide by 2^7 to keep + // sign bit only. + VectorType result_within_low = + VecTraits::svreinterpret(SignedVecTraits::svasr_n( + pg, SignedVecTraits::svreinterpret(diff_low), 7)); + + VectorType diff_up = svsub_x(pg, vec_upper_bound_, src); + VectorType result_within_up = + VecTraits::svreinterpret(SignedVecTraits::svasr_n( + pg, SignedVecTraits::svreinterpret(diff_up), 7)); + + // src[i] < lower_bound OR src[i] > upper_bound + VectorType out_of_range = svorr_x(pg, result_within_low, result_within_up); + // NOT(out_of_range) to set within elements to 1 and the rest to 0. + VectorType within_range = svcnot_x(pg, out_of_range); + // Negate to set elements within to 0xFF (all 1s). + return VecTraits::svreinterpret( + svqneg_x(pg, SignedVecTraits::svreinterpret(within_range))); + } + // NOLINTEND(readability-make-member-function-const) + + private: + VectorType &vec_lower_bound_; + VectorType &vec_upper_bound_; +}; // end of class InRange + +template <> +class InRange { + public: + using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SrcVectorType = typename SrcVecTraits::VectorType; + using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using DstVectorType = typename DstVecTraits::VectorType; + + InRange(float lower_bound, float upper_bound) KLEIDICV_STREAMING_COMPATIBLE + : lower_bound_(lower_bound), + upper_bound_(upper_bound) {} + + void process_row(size_t width, Columns src, + Columns dst) KLEIDICV_STREAMING_COMPATIBLE { + LoopUnroll{width, SrcVecTraits::num_lanes()} + .unroll_n_times<4>([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg_src = SrcVecTraits::svptrue(); + SrcVectorType src_v0 = svld1(pg_src, &src[0]); + SrcVectorType src_v1 = svld1_vnum(pg_src, &src[0], 1); + SrcVectorType src_v2 = svld1_vnum(pg_src, &src[0], 2); + SrcVectorType src_v3 = svld1_vnum(pg_src, &src[0], 3); + DstVectorType res0 = + vector_path(pg_src, src_v0, src_v1, src_v2, src_v3); + svbool_t pg_dst = DstVecTraits::svptrue(); + svst1(pg_dst, &dst[0], res0); + src += ptrdiff_t(step); + dst += ptrdiff_t(step); + }) + .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE { + size_t index = 0; + svbool_t pg = SrcVecTraits::svwhilelt(index, length); + while (svptest_first(SrcVecTraits::svptrue(), pg)) { + SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]); + DstVectorType result_vector = remaining_path(pg, src_vector); + svst1b(pg, &dst[ptrdiff_t(index)], + svreinterpret_u32(result_vector)); + // Update loop counter and calculate the next governing predicate. + index += SrcVecTraits::num_lanes(); + pg = SrcVecTraits::svwhilelt(index, length); + } + }); + } + + private: + // NOLINTBEGIN(readability-make-member-function-const) + DstVectorType vector_path(svbool_t full_pg, SrcVectorType fsrc0, + SrcVectorType fsrc1, SrcVectorType fsrc2, + SrcVectorType fsrc3) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pred0 = svand_z(full_pg, svcmpge(full_pg, fsrc0, lower_bound_), + svcmple(full_pg, fsrc0, upper_bound_)); + auto res00 = svsel(pred0, svdup_u32(0xFF), svdup_u32(0)); + + svbool_t pred1 = svand_z(full_pg, svcmpge(full_pg, fsrc1, lower_bound_), + svcmple(full_pg, fsrc1, upper_bound_)); + auto res01 = svsel(pred1, svdup_u32(0xFF), svdup_u32(0)); + + svbool_t pred2 = svand_z(full_pg, svcmpge(full_pg, fsrc2, lower_bound_), + svcmple(full_pg, fsrc2, upper_bound_)); + auto res10 = svsel(pred2, svdup_u32(0xFF), svdup_u32(0)); + + svbool_t pred3 = svand_z(full_pg, svcmpge(full_pg, fsrc3, lower_bound_), + svcmple(full_pg, fsrc3, upper_bound_)); + auto res11 = svsel(pred3, svdup_u32(0xFF), svdup_u32(0)); + + auto res0 = + svuzp1(svreinterpret_u16_u32(res00), svreinterpret_u16_u32(res01)); + auto res1 = + svuzp1(svreinterpret_u16_u32(res10), svreinterpret_u16_u32(res11)); + return svuzp1(svreinterpret_u8_u16(res0), svreinterpret_u8_u16(res1)); + } + // NOLINTEND(readability-make-member-function-const) + + // NOLINTBEGIN(readability-make-member-function-const) + DstVectorType remaining_path(svbool_t &pg, SrcVectorType src) + KLEIDICV_STREAMING_COMPATIBLE { + svbool_t predicate = svand_z(pg, svcmpge(pg, src, lower_bound_), + svcmple(pg, src, upper_bound_)); + return svsel(predicate, DstVecTraits::svdup(0xFF), DstVecTraits::svdup(0)); + } + // NOLINTEND(readability-make-member-function-const) + + float lower_bound_; + float upper_bound_; +}; // end of class InRange + +template +kleidicv_error_t in_range_sc(const T *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + T lower_bound, + T upper_bound) KLEIDICV_STREAMING_COMPATIBLE { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, height); + CHECK_IMAGE_SIZE(width, height); + + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + Rows dst_rows{dst, dst_stride}; + + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + + if constexpr (std::is_same_v) { + VectorType vec_lower_bound = VecTraits::svdup(lower_bound); + VectorType vec_upper_bound = VecTraits::svdup(upper_bound); + InRange operation{vec_lower_bound, vec_upper_bound}; + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + } else { + InRange operation{lower_bound, upper_bound}; + zip_rows(operation, rect, src_rows, dst_rows); + } + + return KLEIDICV_OK; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_IN_RANGE_SC_H diff --git a/kleidicv/src/arithmetics/in_range_sme2.cpp b/kleidicv/src/arithmetics/in_range_sme2.cpp new file mode 100644 index 000000000..bbcf9e87a --- /dev/null +++ b/kleidicv/src/arithmetics/in_range_sme2.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "in_range_sc.h" + +namespace kleidicv::sme2 { + +template +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +in_range(const T *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, T lower_bound, T upper_bound) { + return in_range_sc(src, src_stride, dst, dst_stride, width, height, + lower_bound, upper_bound); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t in_range( \ + const type *src, size_t src_stride, uint8_t *dst, size_t dst_stride, \ + size_t width, size_t height, type lower_bound, type upper_bound) + +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::sme2 diff --git a/kleidicv/src/arithmetics/in_range_sve2.cpp b/kleidicv/src/arithmetics/in_range_sve2.cpp new file mode 100644 index 000000000..1b241ba57 --- /dev/null +++ b/kleidicv/src/arithmetics/in_range_sve2.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "in_range_sc.h" + +namespace kleidicv::sve2 { + +template +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +in_range(const T *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, T lower_bound, T upper_bound) { + return in_range_sc(src, src_stride, dst, dst_stride, width, height, + lower_bound, upper_bound); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t in_range( \ + const type *src, size_t src_stride, uint8_t *dst, size_t dst_stride, \ + size_t width, size_t height, type lower_bound, type upper_bound) + +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::sve2 diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 8f16dc0af..6d7030843 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -87,6 +87,9 @@ benchmarks=( "UintToFloat: opencv_perf_core '*convertTo/*' '(3840x2160, 8UC1, 32FC1, 1, 1, 0)'" "CompareGt: opencv_perf_core '*compare/*' '(3840x2160, 8UC1, CMP_GT)'" + + "InRange_U8: opencv_perf_core '*inRangeScalar/*' '(3840x2160, 8UC1, 1, 2)'" + "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(3840x2160, 32FC1, 1, 2)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index b1941b87b..8a183409d 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -87,6 +87,9 @@ benchmarks=( "UintToFloat: opencv_perf_core '*convertTo/*' '(1920x1080, 8UC1, 32FC1, 1, 1, 0)'" "CompareGt: opencv_perf_core '*compare/*' '(1920x1080, 8UC1, CMP_GT)'" + + "InRange_U8: opencv_perf_core '*inRangeScalar/*' '(1920x1080, 8UC1, 1, 2)'" + "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(1920x1080, 32FC1, 1, 2)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index 5f30336b0..4aac91f32 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -67,6 +67,7 @@ CORE_TEST_PATTERNS=( '*Core_minMaxIdx*' '*Core_Array*' '*Compare*' + '*Core_InRange/*' ) CORE_TEST_PATTERNS_STR="$(join_strings_with_colon "${CORE_TEST_PATTERNS[*]}")" ../../../conformity/opencv_kleidicv/bin/opencv_test_core \ diff --git a/test/api/test_in_range.cpp b/test/api/test_in_range.cpp new file mode 100644 index 000000000..b22ee5cd9 --- /dev/null +++ b/test/api/test_in_range.cpp @@ -0,0 +1,483 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" + +#define KLEIDICV_IN_RANGE(type, suffix) \ + KLEIDICV_API(in_range, kleidicv_in_range_##suffix, type) + +KLEIDICV_IN_RANGE(uint8_t, u8); +KLEIDICV_IN_RANGE(float, f32); + +template +class InRangeTest final { + private: + template + static constexpr T min() { + return std::numeric_limits::min(); + } + + template + static constexpr T max() { + return std::numeric_limits::max(); + } + struct Elements { + size_t width; + size_t height; + ElementType lower_bound; + ElementType upper_bound; + + std::vector> source_rows; + std::vector> expected_rows; + + Elements(size_t _width, size_t _height, ElementType _lower_bound, + ElementType _upper_bound, + std::vector>&& _source_rows, + std::vector>&& _expected_rows) + : width(_width), + height(_height), + lower_bound(_lower_bound), + upper_bound(_upper_bound), + source_rows(std::move(_source_rows)), + expected_rows(std::move(_expected_rows)) {} + }; + + static float floatval(uint32_t v) { + float result; // Avoid cppcoreguidelines-init-variables. NOLINT + static_assert(sizeof(result) == sizeof(v)); + memcpy(&result, &v, sizeof(result)); + return result; + } + + const float quietNaN = std::numeric_limits::quiet_NaN(); + const float signalingNaN = std::numeric_limits::signaling_NaN(); + const float posInfinity = std::numeric_limits::infinity(); + const float negInfinity = -std::numeric_limits::infinity(); + + const float minusNaN = floatval(0xFF800001); + const float plusNaN = floatval(0x7F800001); + const float plusZero = 0.0F; + const float minusZero = -0.0F; + + const float oneNaN = floatval(0x7FC00001); + const float zeroDivZero = -std::numeric_limits::quiet_NaN(); + const float floatMin = std::numeric_limits::min(); + const float floatMax = std::numeric_limits::max(); + + const float posSubnormalMin = std::numeric_limits::denorm_min(); + const float posSubnormalMax = floatval(0x007FFFFF); + const float negSubnormalMin = -std::numeric_limits::denorm_min(); + const float negSubnormalMax = floatval(0x807FFFFF); + + void calculate_expected(const test::Array2D& source, + test::Array2D& expected, + ElementType lower_bound = 50, + ElementType upper_bound = 100) { + for (size_t hindex = 0; hindex < source.height(); ++hindex) { + for (size_t vindex = 0; vindex < source.width(); ++vindex) { + uint8_t calculated = 0; + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + ElementType current_element = *source.at(hindex, vindex); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + if ((current_element >= lower_bound) && + (current_element <= upper_bound)) { + calculated = 255; + } else { + calculated = 0; + } + *expected.at(hindex, vindex) = calculated; + } + } + } + + template + size_t get_linear_height(size_t width, size_t minimum_size) { + size_t image_size = + std::max(minimum_size, static_cast(max() - min())); + size_t height = image_size / width + 1; + + return height; + } + + template + std::tuple, test::Array2D, test::Array2D> + get_linear_arrays(size_t width, size_t height) { + test::Array2D source(width, height, 1, 1); + test::Array2D expected(width, height, 1, 1); + test::Array2D actual(width, height, 1, 1); + + if constexpr (std::is_same_v) { + test::GenerateLinearSeries generator(min()); + source.fill(generator); + } else if constexpr (std::is_same_v) { + test::GenerateLinearSeries generator(min()); + source.fill(generator); + } else { + static_assert(sizeof(T), "should never happen"); + } + + calculate_expected(source, expected); + + return {source, expected, actual}; + } + + std::array inputs_padding_{}; + std::array outputs_padding_{}; + + public: + // Tests special float values. + template , bool> = true> + const Elements& get_custom_test_elements() { + static const Elements kTestElements = { + // clang-format off + 4, 6, + -11.4, 1111.10, + {{ + { quietNaN, signalingNaN, posInfinity, negInfinity}, + { minusNaN, plusNaN, plusZero, minusZero}, + { oneNaN, zeroDivZero, floatMin, floatMax}, + {posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax}, + { 1111.11, -1112.22, 113.33, 114.44}, + { 11.5, 12.5, -11.5, -12.5}, + }}, + {{ + { 0, 0, 0, 0}, + { 0, 0, 255, 255}, + { 0, 0, 255, 0}, + {255, 255, 255, 255}, + { 0, 0, 255, 255}, + {255, 255, 0, 0}, + }} + // clang-format on + }; + return kTestElements; + } + + // float and uint8_t behave differently around their max values. + template , bool> = true> + const Elements& get_max_test_elements() { + static const Elements kTestElements = { + // clang-format off + 2, 3, + max() - 100, max() - 1, + {{ + { 10, max() - 105}, + {max() - 100, max() - 2}, + { max() - 1, max()}, + }}, + {{ + { 0, 255}, + {255, 255}, + {255, 255}, + }} + // clang-format on + }; + return kTestElements; + } + + // More extensive test for uint8_t values. + template , bool> = true> + const Elements& get_custom_test_elements() { + static const Elements kTestElements = { + // clang-format off + 16, 16, + 1, 10, + {{ + {177, 48, 1, 58, 223, 153, 36, 183, 113, 234, 218, 216, 129, 243, 230, 185}, + { 45, 187, 58, 65, 247, 68, 122, 158, 239, 84, 171, 15, 180, 156, 227, 21}, + { 53, 95, 117, 138, 197, 251, 23, 120, 201, 223, 219, 50, 37, 4, 112, 129}, + {240, 245, 130, 3, 65, 182, 19, 254, 241, 216, 198, 22, 101, 189, 151, 60}, + {234, 147, 201, 67, 121, 42, 173, 151, 108, 220, 190, 230, 23, 135, 139, 66}, + { 62, 180, 191, 65, 74, 190, 237, 102, 6, 196, 14, 76, 192, 12, 223, 232}, + { 66, 167, 77, 86, 85, 228, 104, 127, 251, 126, 56, 107, 93, 153, 222, 2}, + {160, 74, 225, 116, 243, 193, 166, 157, 193, 233, 109, 141, 86, 83, 156, 203}, + { 5, 225, 87, 53, 5, 104, 122, 157, 68, 45, 39, 229, 133, 138, 47, 231}, + {225, 4, 246, 0, 89, 227, 150, 114, 45, 144, 98, 192, 245, 232, 214, 39}, + {124, 119, 166, 74, 34, 237, 107, 200, 243, 139, 241, 147, 99, 34, 101, 73}, + {234, 157, 175, 116, 51, 73, 93, 125, 125, 110, 96, 252, 209, 60, 169, 138}, + {248, 224, 173, 23, 55, 103, 90, 71, 125, 224, 46, 212, 11, 25, 102, 151}, + {183, 199, 239, 93, 17, 125, 4, 109, 55, 89, 196, 136, 164, 76, 159, 223}, + {172, 12, 50, 36, 123, 102, 127, 119, 251, 46, 85, 25, 64, 140, 111, 163}, + {120, 19, 56, 222, 99, 198, 180, 115, 150, 168, 222, 198, 91, 154, 35, 133}, + }}, + {{ + { 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0}, + { 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {255, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + }} + // clang-format on + }; + return kTestElements; + } + + // float and uint8_t behave differently around their max values. + template , bool> = true> + const Elements& get_max_test_elements() { + static const Elements kTestElements = { + // clang-format off + 2, 3, + max() - 100, max() - 1, + {{ + {max() - 105, 10}, + {max() - 100, max() - 2}, + { max() - 1, max()}, + }}, + {{ + { 0, 0}, + {255, 255}, + {255, 0}, + }} + // clang-format on + }; + return kTestElements; + } + + // lower_bound > upperbound -> no values within bounds + const Elements& get_empty_interval_test_elements() { + static const Elements kTestElements = { + // clang-format off + 3, 1, + 13, 0, + {{ + {12, 13, 14} + }}, + {{ + { 0, 0, 0} + }} + // clang-format on + }; + return kTestElements; + } + + const Elements& get_same_bounds_test_elements() { + static const Elements kTestElements = { + // clang-format off + 3, 1, + 13, 13, + {{ + {12, 13, 14} + }}, + {{ + { 0, 255, 0} + }} + // clang-format on + }; + return kTestElements; + } + + const Elements& get_min_test_elements() { + static const Elements kTestElements = { + // clang-format off + 3, 1, + min(), min() + 1, + {{ + {min(), min() + 1, min() + 2} + }}, + {{ + { 255, 255, 0} + }} + // clang-format on + }; + return kTestElements; + } + + const Elements& get_zero_one_test_elements() { + static const Elements kTestElements = { + // clang-format off + 3, 1, + 0, 1, + {{ + {0, 1, 2} + }}, + {{ + { 255, 255, 0} + }} + // clang-format on + }; + return kTestElements; + } + + // minimum_size set by caller to trigger the 'big' conversion path. + template + void test_linear(size_t width, size_t minimum_size = 1, T lower_bound = 50, + T upper_bound = 100) { + size_t height = 0; + height = get_linear_height(width, minimum_size); + + auto arrays = get_linear_arrays(width, height); + + test::Array2D& source = std::get<0>(arrays); + test::Array2D& expected = std::get<1>(arrays); + test::Array2D& actual = std::get<2>(arrays); + + ASSERT_EQ(KLEIDICV_OK, (in_range()(source.data(), source.stride(), + actual.data(), actual.stride(), width, + height, lower_bound, upper_bound))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + void test(const Elements& elements_list) { + const size_t width = elements_list.width; + const size_t height = elements_list.height; + const ElementType lower_bound = elements_list.lower_bound; + const ElementType upper_bound = elements_list.upper_bound; + + test::Array2D source(width, height); + test::Array2D expected(width, height); + test::Array2D actual(width, height); + + for (size_t i = 0; i < height; i++) { + source.set(i, 0, elements_list.source_rows[i]); + expected.set(i, 0, elements_list.expected_rows[i]); + } + + ASSERT_EQ(KLEIDICV_OK, + (in_range()(source.data(), source.stride(), + actual.data(), actual.stride(), width, + height, lower_bound, upper_bound))); + + EXPECT_EQ_ARRAY2D(expected, actual); + } + + InRangeTest& with_paddings( + std::initializer_list inputs_padding, + std::initializer_list outputs_padding) { + size_t i = 0; + for (size_t p : inputs_padding) { + inputs_padding_[i++] = p; + } + size_t j = 0; + for (size_t q : outputs_padding) { + outputs_padding_[j++] = q; + } + return *this; + } + + InRangeTest() { + inputs_padding_.fill(0); + outputs_padding_.fill(0); + } + + virtual ~InRangeTest() = default; +}; + +template +class InRange : public testing::Test {}; + +using ElementTypes = ::testing::Types; + +TYPED_TEST_SUITE(InRange, ElementTypes); + +// Tests various padding combinations. +TYPED_TEST(InRange, Padding) { + auto elements_list = InRangeTest{}.get_same_bounds_test_elements(); + InRangeTest{}.with_paddings({0}, {0}).test(elements_list); + InRangeTest{}.with_paddings({0}, {1}).test(elements_list); + InRangeTest{}.with_paddings({1}, {0}).test(elements_list); + InRangeTest{}.with_paddings({1}, {1}).test(elements_list); +} + +TYPED_TEST(InRange, Scalar) { + InRangeTest{}.template test_linear( + test::Options::vector_length() - 1); +} + +TYPED_TEST(InRange, Vector) { + InRangeTest{}.template test_linear( + test::Options::vector_length() * 2); +} + +TYPED_TEST(InRange, TestEmptyInterval) { + auto elements_list = + InRangeTest{}.get_empty_interval_test_elements(); + InRangeTest{}.test(elements_list); +} + +TYPED_TEST(InRange, TestSameBounds) { + auto elements_list = InRangeTest{}.get_same_bounds_test_elements(); + InRangeTest{}.test(elements_list); +} + +TYPED_TEST(InRange, TestCustom) { + auto elements_list = + InRangeTest{}.template get_custom_test_elements(); + InRangeTest{}.test(elements_list); +} + +TYPED_TEST(InRange, TestMax) { + auto elements_list = + InRangeTest{}.template get_max_test_elements(); + InRangeTest{}.test(elements_list); +} + +TYPED_TEST(InRange, TestMin) { + auto elements_list = InRangeTest{}.get_min_test_elements(); + InRangeTest{}.test(elements_list); +} + +TYPED_TEST(InRange, TestZeroOne) { + auto elements_list = InRangeTest{}.get_zero_one_test_elements(); + InRangeTest{}.test(elements_list); +} + +TYPED_TEST(InRange, NullPointer) { + const TypeParam src[1] = {}; + uint8_t dst[1]; + test::test_null_args(in_range(), src, sizeof(uint8_t), dst, + sizeof(TypeParam), 1, 1, 1, 1); +} + +TYPED_TEST(InRange, Misalignment) { + if (sizeof(TypeParam) == 1) { + // misalignment impossible + return; + } + TypeParam src[2] = {}; + uint8_t dst[2]; + EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT, + in_range()(src, sizeof(TypeParam) + 1, dst, + sizeof(uint8_t), 2, 2, 1, 1)); +} + +TYPED_TEST(InRange, ZeroImageSize) { + TypeParam src[1] = {}; + uint8_t dst[1]; + EXPECT_EQ(KLEIDICV_OK, in_range()(src, sizeof(TypeParam), dst, + sizeof(uint8_t), 0, 1, 1, 1)); + EXPECT_EQ(KLEIDICV_OK, in_range()(src, sizeof(TypeParam), dst, + sizeof(uint8_t), 1, 0, 1, 1)); +} + +TYPED_TEST(InRange, OversizeImage) { + TypeParam src[1] = {}; + uint8_t dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + in_range()(src, sizeof(TypeParam), dst, sizeof(uint8_t), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, 1)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + in_range()(src, sizeof(TypeParam), dst, sizeof(uint8_t), + KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, 1, 1)); +} -- GitLab From 70789beb773545c0547c8b42b2f28204ebbe5efd Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Fri, 9 Aug 2024 10:59:11 +0200 Subject: [PATCH 3/4] Allow testing special float values in all conformity tests --- conformity/opencv/common.h | 44 +++++++++++++++++++++++++++ conformity/opencv/test_float_conv.cpp | 44 --------------------------- conformity/opencv/test_in_range.cpp | 22 ++++++++++++++ 3 files changed, 66 insertions(+), 44 deletions(-) diff --git a/conformity/opencv/common.h b/conformity/opencv/common.h index 002266df2..351ba4ddf 100644 --- a/conformity/opencv/common.h +++ b/conformity/opencv/common.h @@ -32,6 +32,50 @@ #define KLEIDICV_CONFORMITY_MAX_MAT_DIMENSIONS 4 +static float floatval(uint32_t v) { + float result; // Avoid cppcoreguidelines-init-variables. NOLINT + static_assert(sizeof(result) == sizeof(v)); + memcpy(&result, &v, sizeof(result)); + return result; +} + +const float quietNaN = std::numeric_limits::quiet_NaN(); +const float signalingNaN = std::numeric_limits::signaling_NaN(); +const float posInfinity = std::numeric_limits::infinity(); +const float negInfinity = -std::numeric_limits::infinity(); + +const float minusNaN = floatval(0xFF800001); +const float plusNaN = floatval(0x7F800001); +const float plusZero = 0.0F; +const float minusZero = -0.0F; + +const float oneNaN = floatval(0x7FC00001); +const float zeroDivZero = -std::numeric_limits::quiet_NaN(); +const float floatMin = std::numeric_limits::min(); +const float floatMax = std::numeric_limits::max(); + +const float posSubnormalMin = std::numeric_limits::denorm_min(); +const float posSubnormalMax = floatval(0x007FFFFF); +const float negSubnormalMin = -std::numeric_limits::denorm_min(); +const float negSubnormalMax = floatval(0x807FFFFF); + +static constexpr int custom_data_float_height = 8; +static constexpr int custom_data_float_width = 4; + +static float + custom_data_float[custom_data_float_height * custom_data_float_width] = { + // clang-format off + quietNaN, signalingNaN, posInfinity, negInfinity, + minusNaN, plusNaN, plusZero, minusZero, + oneNaN, zeroDivZero, floatMin, floatMax, + posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax, + 1111.11, -1112.22, 113.33, 114.44, + 111.51, 112.62, 113.73, 114.84, + 126.66, 127.11, 128.66, 129.11, + 11.5, 12.5, -11.5, -12.5, + // clang-format on +}; + template static constexpr T min() { return std::numeric_limits::min(); diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp index 26b6bb189..152301617 100644 --- a/conformity/opencv/test_float_conv.cpp +++ b/conformity/opencv/test_float_conv.cpp @@ -7,33 +7,6 @@ #include "tests.h" -float floatval(uint32_t v) { - float result; - static_assert(sizeof(result) == sizeof(v)); - memcpy(&result, &v, sizeof(result)); - return result; -} - -float quietNaN = std::numeric_limits::quiet_NaN(); -float signalingNaN = std::numeric_limits::signaling_NaN(); -float posInfinity = std::numeric_limits::infinity(); -float negInfinity = -std::numeric_limits::infinity(); - -float minusNaN = floatval(0xFF800001); -float plusNaN = floatval(0x7F800001); -float plusZero = 0.0F; -float minusZero = -0.0F; - -float oneNaN = floatval(0x7FC00001); -float zeroDivZero = -std::numeric_limits::quiet_NaN(); -float floatMin = std::numeric_limits::min(); -float floatMax = std::numeric_limits::max(); - -float posSubnormalMin = std::numeric_limits::denorm_min(); -float posSubnormalMax = floatval(0x007FFFFF); -float negSubnormalMin = -std::numeric_limits::denorm_min(); -float negSubnormalMax = floatval(0x807FFFFF); - template cv::Mat exec_float32_to_int8(cv::Mat& input) { cv::Mat result; @@ -98,23 +71,6 @@ bool test_int8_to_float32_random(int index, return false; } -static constexpr int custom_data_float_height = 8; -static constexpr int custom_data_float_width = 4; - -static float - custom_data_float[custom_data_float_height * custom_data_float_width] = { - // clang-format off - quietNaN, signalingNaN, posInfinity, negInfinity, - minusNaN, plusNaN, plusZero, minusZero, - oneNaN, zeroDivZero, floatMin, floatMax, - posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax, - 1111.11, -1112.22, 113.33, 114.44, - 111.51, 112.62, 113.73, 114.84, - 126.66, 127.11, 128.66, 129.11, - 11.5, 12.5, -11.5, -12.5, - // clang-format on -}; - static constexpr int custom_data_int8_height = 1; static constexpr int custom_data_int8_width = 7; diff --git a/conformity/opencv/test_in_range.cpp b/conformity/opencv/test_in_range.cpp index e925c80b2..315474e05 100644 --- a/conformity/opencv/test_in_range.cpp +++ b/conformity/opencv/test_in_range.cpp @@ -36,6 +36,26 @@ bool test_in_range(int index, RecreatedMessageQueue& request_queue, return false; } + +template +bool test_in_range_custom(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::Mat input(custom_data_float_height, custom_data_float_width, CV_32FC1, + custom_data_float); + + cv::Mat actual = exec_in_range(input); + cv::Mat expected = + get_expected_from_subordinate(index, request_queue, reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(custom_data_float_height, custom_data_float_width, + input, actual, expected); + return true; + } + + return false; +} + #endif std::vector& in_range_tests_get() { @@ -56,6 +76,8 @@ std::vector& in_range_tests_get() { TEST("InRange float, lower_bound = 14.999, upper_bound = 20.998", (test_in_range<14999, 20998, CV_32FC1>), (exec_in_range<14999, 20998>)), TEST("InRange float, lower_bound = 999999.998, upper_bound = 999999.999", (test_in_range<(999999998), (999999999), CV_32FC1>), (exec_in_range<(999999998), (999999999)>)), TEST("InRange float, lower_bound = 999989.999, upper_bound = 999999.999", (test_in_range<(999989999), (999999999), CV_32FC1>), (exec_in_range<(999989999), (999999999)>)), + TEST("InRange float, lower_bound = 0, upper_bound = 1", (test_in_range_custom<(0), (1), CV_32FC1>), (exec_in_range<(0), (1)>)), + TEST("InRange float, lower_bound = -11.4, upper_bound = 1111.1", (test_in_range_custom<(-11400), (1111100), CV_32FC1>), (exec_in_range<(-11400), (1111100)>)), }; // clang-format on return tests; -- GitLab From 1c342af3faf0d50253a596b512e76c7051fdb322 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Fri, 9 Aug 2024 11:01:56 +0200 Subject: [PATCH 4/4] Add SVE intrinsics overloads required for inRange --- kleidicv/include/kleidicv/sve2.h | 40 ++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/kleidicv/include/kleidicv/sve2.h b/kleidicv/include/kleidicv/sve2.h index ade877891..214cda636 100644 --- a/kleidicv/include/kleidicv/sve2.h +++ b/kleidicv/include/kleidicv/sve2.h @@ -331,9 +331,9 @@ class VecTraits : public VecTraitsBase { static inline svint8_t svdup(int8_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_s8(v); } - static inline svuint8_t svreinterpret(svint8_t v) + static inline svint8_t svreinterpret(svuint8_t v) KLEIDICV_STREAMING_COMPATIBLE { - return svreinterpret_u8(v); + return svreinterpret_s8(v); } static inline svint8_t svasr_n(svbool_t pg, svint8_t v, uint8_t s) KLEIDICV_STREAMING_COMPATIBLE { @@ -347,9 +347,13 @@ class VecTraits : public VecTraitsBase { static inline svuint8_t svdup(uint8_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_u8(v); } - static inline svint8_t svreinterpret(svuint8_t v) + static inline svuint8_t svreinterpret(svint8_t v) KLEIDICV_STREAMING_COMPATIBLE { - return svreinterpret_s8(v); + return svreinterpret_u8(v); + } + static inline svuint8_t svsub(svbool_t pg, svuint8_t v, + svuint8_t u) KLEIDICV_STREAMING_COMPATIBLE { + return svsub_u8_x(pg, v, u); } static inline svuint8_t svhsub(svbool_t pg, svuint8_t v, svuint8_t u) KLEIDICV_STREAMING_COMPATIBLE { @@ -363,6 +367,10 @@ class VecTraits : public VecTraitsBase { static inline svint16_t svdup(int16_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_s16(v); } + static inline svint16_t svreinterpret(svuint16_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_s16(v); + } }; // end of class VecTraits template <> @@ -371,6 +379,10 @@ class VecTraits : public VecTraitsBase { static inline svuint16_t svdup(uint16_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_u16(v); } + static inline svuint16_t svreinterpret(svint16_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_u16(v); + } }; // end of class VecTraits template <> @@ -379,6 +391,10 @@ class VecTraits : public VecTraitsBase { static inline svint32_t svdup(int32_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_s32(v); } + static inline svint32_t svreinterpret(svuint32_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_s32(v); + } }; // end of class VecTraits template <> @@ -387,6 +403,10 @@ class VecTraits : public VecTraitsBase { static inline svuint32_t svdup(uint32_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_u32(v); } + static inline svuint32_t svreinterpret(svint32_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_u32(v); + } }; // end of class VecTraits template <> @@ -395,6 +415,10 @@ class VecTraits : public VecTraitsBase { static inline svint64_t svdup(int64_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_s64(v); } + static inline svint64_t svreinterpret(svuint64_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_s64(v); + } }; // end of class VecTraits template <> @@ -403,6 +427,10 @@ class VecTraits : public VecTraitsBase { static inline svuint64_t svdup(uint64_t v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_u64(v); } + static inline svuint64_t svreinterpret(svint64_t v) + KLEIDICV_STREAMING_COMPATIBLE { + return svreinterpret_u64(v); + } }; // end of class VecTraits template <> @@ -411,6 +439,10 @@ class VecTraits : public VecTraitsBase { static inline svfloat32_t svdup(float v) KLEIDICV_STREAMING_COMPATIBLE { return svdup_f32(v); } + static inline svfloat32_t svsub(svbool_t pg, svfloat32_t v, + svfloat32_t u) KLEIDICV_STREAMING_COMPATIBLE { + return svsub_f32_x(pg, v, u); + } }; // end of class VecTraits template <> -- GitLab