From 03af8ffb391397adf897ecc24bd0a3c06d1a0bff Mon Sep 17 00:00:00 2001
From: Ioana Ghiban <ioana.ghiban@arm.com>
Date: Wed, 12 Jun 2024 10:18:47 +0200
Subject: [PATCH 1/4] Add NEON intrinsics overloads required for comparing

---
 kleidicv/include/kleidicv/neon_intrinsics.h | 24 +++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h
index 0add00459..96c1abba3 100644
--- a/kleidicv/include/kleidicv/neon_intrinsics.h
+++ b/kleidicv/include/kleidicv/neon_intrinsics.h
@@ -108,6 +108,18 @@ static inline uint32x4_t vaddl(uint16x4_t lhs, uint16x4_t rhs) { return vaddl_u1
 static inline int64x2_t  vaddl(int32x2_t  lhs, int32x2_t  rhs) { return vaddl_s32(lhs, rhs); }
 static inline uint64x2_t vaddl(uint32x2_t lhs, uint32x2_t rhs) { return vaddl_u32(lhs, rhs); }
 
+// -----------------------------------------------------------------------------
+// vbslq*
+// -----------------------------------------------------------------------------
+
+static inline int8x16_t vbslq(int8x16_t a, int8x16_t b, int8x16_t c) { return vbslq_s8(a, b, c); }
+static inline uint8x16_t vbslq(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vbslq_u8(a, b, c); }
+static inline int16x8_t vbslq(int16x8_t a, int16x8_t b, int16x8_t c) { return vbslq_s16(a, b, c); }
+static inline uint16x8_t vbslq(uint16x8_t a, uint16x8_t b, uint16x8_t c) { return vbslq_u16(a, b, c); }
+static inline int32x4_t vbslq(int32x4_t a, int32x4_t b, int32x4_t c) { return vbslq_s32(a, b, c); }
+static inline uint32x4_t vbslq(uint32x4_t a, uint32x4_t b, uint32x4_t c) { return vbslq_u32(a, b, c); }
+static inline float32x4_t vbslq(uint32x4_t a, float32x4_t b, float32x4_t c) { return vbslq_f32(a, b, c); }
+
 // -----------------------------------------------------------------------------
 // vget_high*
 // -----------------------------------------------------------------------------
@@ -121,6 +133,12 @@ static inline uint32x2_t vget_high(uint32x4_t vec) { return vget_high_u32(vec);
 static inline int64x1_t  vget_high(int64x2_t  vec) { return vget_high_s64(vec); }
 static inline uint64x1_t vget_high(uint64x2_t vec) { return vget_high_u64(vec); }
 
+// -----------------------------------------------------------------------------
+// vcgeq*
+// -----------------------------------------------------------------------------
+
+static inline uint32x4_t vcgeq(float32x4_t lhs, float32x4_t rhs) { return vcgeq_f32(lhs, rhs); }
+
 // -----------------------------------------------------------------------------
 // vget_low*
 // -----------------------------------------------------------------------------
@@ -182,6 +200,12 @@ static inline int32_t   vmaxvq(int32x4_t   src) { return vmaxvq_s32(src); }
 static inline uint32_t  vmaxvq(uint32x4_t  src) { return vmaxvq_u32(src); }
 static inline float32_t vmaxvq(float32x4_t src) { return vmaxvq_f32(src); }
 
+// -----------------------------------------------------------------------------
+// vcleq*
+// -----------------------------------------------------------------------------
+
+static inline uint32x4_t vcleq(float32x4_t lhs, float32x4_t rhs) { return vcleq_f32(lhs, rhs); }
+
 // -----------------------------------------------------------------------------
 // vrshrn_n*
 // -----------------------------------------------------------------------------
-- 
GitLab


From 4312b2d54c97a167c8fa71ff11cd94da8e3d3de3 Mon Sep 17 00:00:00 2001
From: Ioana Ghiban <ioana.ghiban@arm.com>
Date: Wed, 12 Jun 2024 10:20:27 +0200
Subject: [PATCH 2/4] Implement inRange API for uint8 and float

---
 CHANGELOG.md                                  |   1 +
 .../opencv/extra_benchmarks/opencv-4.9.patch  |  40 ++
 adapters/opencv/kleidicv_hal.cpp              |  24 +
 adapters/opencv/kleidicv_hal.h                |  31 ++
 adapters/opencv/opencv-4.9.patch              |  48 +-
 benchmark/benchmark.cpp                       |  19 +
 conformity/opencv/common.h                    |  16 +
 conformity/opencv/test_in_range.cpp           |  62 +++
 conformity/opencv/tests.cpp                   |   1 +
 conformity/opencv/tests.h                     |   1 +
 doc/functionality.md                          |   1 +
 doc/opencv.md                                 |   8 +
 kleidicv/include/kleidicv/kleidicv.h          |  32 ++
 kleidicv/src/arithmetics/in_range_api.cpp     |  40 ++
 kleidicv/src/arithmetics/in_range_neon.cpp    | 136 +++++
 kleidicv/src/arithmetics/in_range_sc.h        | 180 +++++++
 kleidicv/src/arithmetics/in_range_sme2.cpp    |  25 +
 kleidicv/src/arithmetics/in_range_sve2.cpp    |  25 +
 scripts/benchmark/run_benchmarks_4K.sh        |   3 +
 scripts/benchmark/run_benchmarks_FHD.sh       |   3 +
 scripts/ci-opencv.sh                          |   1 +
 test/api/test_in_range.cpp                    | 483 ++++++++++++++++++
 22 files changed, 1178 insertions(+), 2 deletions(-)
 create mode 100644 conformity/opencv/test_in_range.cpp
 create mode 100644 kleidicv/src/arithmetics/in_range_api.cpp
 create mode 100644 kleidicv/src/arithmetics/in_range_neon.cpp
 create mode 100644 kleidicv/src/arithmetics/in_range_sc.h
 create mode 100644 kleidicv/src/arithmetics/in_range_sme2.cpp
 create mode 100644 kleidicv/src/arithmetics/in_range_sve2.cpp
 create mode 100644 test/api/test_in_range.cpp

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 867d936f2..4128afb3e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,7 @@ This changelog aims to follow the guiding principles of
 - Conversion from float to (u)int8_t and vice versa.
 - KLEIDICV_LIMIT_SME2_TO_SELECTED_ALGORITHMS configuration option.
 - Conversion from non-subsampled, interleaved YUV to RGB/BGR.
+- InRange single channel, scalar bounds for uint8_t and float.
 
 ### Fixed
 
diff --git a/adapters/opencv/extra_benchmarks/opencv-4.9.patch b/adapters/opencv/extra_benchmarks/opencv-4.9.patch
index 3f6195ee3..00d88a24e 100644
--- a/adapters/opencv/extra_benchmarks/opencv-4.9.patch
+++ b/adapters/opencv/extra_benchmarks/opencv-4.9.patch
@@ -60,6 +60,46 @@ index 344d81cb8a..ef5a3aa7d2 100644
  
      double eps = depthSrc <= CV_32S && (depthDst <= CV_32S || depthDst == CV_64F) ? 1e-12 : (FLT_EPSILON * maxValue);
      eps = eps * std::max(1.0, fabs(alpha));
+diff --git a/modules/core/perf/perf_inRangeScalar.cpp b/modules/core/perf/perf_inRangeScalar.cpp
+new file mode 100644
+index 0000000000..9ecca30b6c
+--- /dev/null
++++ b/modules/core/perf/perf_inRangeScalar.cpp
+@@ -0,0 +1,34 @@
++#include "perf_precomp.hpp"
++
++namespace opencv_test
++{
++using namespace perf;
++
++typedef tuple<Size, MatType, double, double> Size_TypeSrc_lb_ub_t;
++typedef perf::TestBaseWithParam<Size_TypeSrc_lb_ub_t> Size_TypeSrc_lb_ub;
++
++PERF_TEST_P( Size_TypeSrc_lb_ub, inRangeScalar,
++             testing::Combine
++             (
++                 testing::Values(sz1080p, sz2160p),
++                 testing::Values(CV_8UC1, CV_32FC1),
++                 testing::Values(1),
++                 testing::Values(2)
++             )
++           )
++{
++    Size size = get<0>(GetParam());
++    int type = get<1>(GetParam());
++    Mat lb (1, 1, type, get<2>(GetParam()));
++    Mat ub (1, 1, type, get<3>(GetParam()));
++    Mat src(size, type);
++    Mat dst(size, CV_8UC1);
++
++    declare.in(src, WARMUP_RNG).out(dst);
++
++    TEST_CYCLE() inRange( src, lb, ub, dst );
++
++    SANITY_CHECK(dst);
++}
++
++} // namespace
 diff --git a/modules/imgproc/perf/perf_blur.cpp b/modules/imgproc/perf/perf_blur.cpp
 index d1f5a6b1ca..c4a3a6cdc5 100644
 --- a/modules/imgproc/perf/perf_blur.cpp
diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp
index b0b779254..1fad7dc0c 100644
--- a/adapters/opencv/kleidicv_hal.cpp
+++ b/adapters/opencv/kleidicv_hal.cpp
@@ -1033,4 +1033,28 @@ int compare_u8(const uchar *src1_data, size_t src1_step, const uchar *src2_data,
   }
 }
 
+int inRange_u8(const uchar *src_data, size_t src_step, uchar *dst_data,
+               size_t dst_step, int dst_depth, int width, int height, int cn,
+               uchar lower_bound, uchar upper_bound) {
+  if (dst_depth != CV_8U || cn != 1) {
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+  }
+  return convert_error(kleidicv_in_range_u8(
+      reinterpret_cast<const uint8_t *>(src_data), src_step,
+      reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height,
+      static_cast<uint8_t>(lower_bound), static_cast<uint8_t>(upper_bound)));
+}
+
+int inRange_f32(const uchar *src_data, size_t src_step, uchar *dst_data,
+                size_t dst_step, int dst_depth, int width, int height, int cn,
+                double lower_bound, double upper_bound) {
+  if (dst_depth != CV_8U || cn != 1) {
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+  }
+  return convert_error(kleidicv_in_range_f32(
+      reinterpret_cast<const float *>(src_data), src_step,
+      reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height,
+      static_cast<float>(lower_bound), static_cast<float>(upper_bound)));
+}
+
 }  // namespace kleidicv::hal
diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h
index 2505922cb..5b2e5ca86 100644
--- a/adapters/opencv/kleidicv_hal.h
+++ b/adapters/opencv/kleidicv_hal.h
@@ -129,6 +129,14 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth,
 
 int exp32f(const float *src, float *dst, int len);
 
+int inRange_u8(const uchar *src_data, size_t src_step, uchar *dst_data,
+               size_t dst_step, int dst_depth, int width, int height, int cn,
+               uchar lower_bound, uchar upper_bound);
+
+int inRange_f32(const uchar *src_data, size_t src_step, uchar *dst_data,
+                size_t dst_step, int dst_depth, int width, int height, int cn,
+                double lower_bound, double upper_bound);
+
 }  // namespace hal
 }  // namespace kleidicv
 
@@ -497,6 +505,29 @@ KLEIDICV_HAL_MUL(mul16s, kleidicv_saturating_multiply_s16, int16_t);
 #undef cv_hal_mul16s
 #define cv_hal_mul16s kleidicv_mul16s_with_fallback
 
+// inRange
+static inline int kleidicv_in_range_u8_with_fallback(
+    const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step,
+    int dst_depth, size_t width, size_t height, int cn, uchar lower_bound,
+    uchar upper_bound) {
+  return KLEIDICV_HAL_FALLBACK_FORWARD(
+      inRange_u8, cv_hal_inRange8u, src_data, src_step, dst_data, dst_step,
+      dst_depth, width, height, cn, lower_bound, upper_bound);
+}
+#undef cv_hal_inRange8u
+#define cv_hal_inRange8u kleidicv_in_range_u8_with_fallback
+
+static inline int kleidicv_in_range_f32_with_fallback(
+    const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step,
+    int dst_depth, size_t width, size_t height, int cn, double lower_bound,
+    double upper_bound) {
+  return KLEIDICV_HAL_FALLBACK_FORWARD(
+      inRange_f32, cv_hal_inRange32f, src_data, src_step, dst_data, dst_step,
+      dst_depth, width, height, cn, lower_bound, upper_bound);
+}
+#undef cv_hal_inRange32f
+#define cv_hal_inRange32f kleidicv_in_range_f32_with_fallback
+
 #endif  // OPENCV_CORE_HAL_REPLACEMENT_HPP
 
 // Remove no longer needed macro definitions.
diff --git a/adapters/opencv/opencv-4.9.patch b/adapters/opencv/opencv-4.9.patch
index d731aa2fb..42897a636 100644
--- a/adapters/opencv/opencv-4.9.patch
+++ b/adapters/opencv/opencv-4.9.patch
@@ -74,6 +74,30 @@ index 6f0a83d359..4c294962ca 100644
  
  #define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
  #define CV_MAKE_TYPE CV_MAKETYPE
+diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
+index 9de474b402..4757861a34 100644
+--- a/modules/core/src/arithm.cpp
++++ b/modules/core/src/arithm.cpp
+@@ -1803,6 +1803,19 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
+ 
+         convertAndUnrollScalar( lb, src.type(), lbuf, blocksize );
+         convertAndUnrollScalar( ub, src.type(), ubuf, blocksize );
++
++        if (depth == CV_8U) {
++            uint8_t lb_scalar = lb.at<uint8_t>(0, 0);
++            uint8_t ub_scalar = ub.at<uint8_t>(0, 0);
++            CALL_HAL(inRange_u8, cv_hal_inRange8u, src.data, src.step, dst.data, dst.step, dst.depth(), src.cols, src.rows, src.channels(),
++                lb_scalar, ub_scalar);
++            
++        } else if (depth == CV_32F) {
++            double lb_scalar = lb.at<double>(0, 0);
++            double ub_scalar = ub.at<double>(0, 0);
++            CALL_HAL(inRange_f32, cv_hal_inRange32f, src.data, src.step, dst.data, dst.step, dst.depth(), src.cols, src.rows, src.channels(),
++                lb_scalar, ub_scalar);
++        }
+     }
+ 
+     for( size_t i = 0; i < it.nplanes; i++, ++it )
 diff --git a/modules/core/src/convert.dispatch.cpp b/modules/core/src/convert.dispatch.cpp
 index 345b4624cb..8698cc64bf 100644
 --- a/modules/core/src/convert.dispatch.cpp
@@ -96,10 +120,10 @@ index 345b4624cb..8698cc64bf 100644
  
      if( dims <= 2 )
 diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp
-index 1f2b259920..0e19b24f7f 100644
+index 1f2b259920..72a42d4beb 100644
 --- a/modules/core/src/hal_replacement.hpp
 +++ b/modules/core/src/hal_replacement.hpp
-@@ -818,6 +818,35 @@ inline int hal_ni_rotate90(int src_type, const uchar* src_data, size_t src_step,
+@@ -818,6 +818,55 @@ inline int hal_ni_rotate90(int src_type, const uchar* src_data, size_t src_step,
  #define cv_hal_rotate90 hal_ni_rotate90
  //! @endcond
  
@@ -131,6 +155,26 @@ index 1f2b259920..0e19b24f7f 100644
 +//! @cond IGNORED
 +#define cv_hal_convertTo hal_ni_convertTo
 +//! @endcond
++
++/**
++   @brief inRange
++   @param src_data,src_step,src_depth Source image
++   @param dst_data,dst_step,dst_depth Destination image
++   @param width,height Source image dimensions
++   @param cn number of channels
++   @param lower_bound,upper_bound Dst values = (lower_bound <= src_value) && (src_value <= upper_bound) ? 255 : 0
++*/
++inline int hal_ni_inRange8u(const uchar *src_data, size_t src_step,
++              uchar *dst_data, size_t dst_step, int dst_depth, int width,
++              int height, int cn, uchar lower_bound, uchar upper_bound) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
++inline int hal_ni_inRange32f(const uchar *src_data, size_t src_step,
++              uchar *dst_data, size_t dst_step, int dst_depth, int width,
++              int height, int cn, double lower_bound, double upper_bound) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
++
++//! @cond IGNORED
++#define cv_hal_inRange8u hal_ni_inRange8u
++#define cv_hal_inRange32f hal_ni_inRange32f
++//! @endcond
 +
  //! @}
  
diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp
index 3b881ccd3..3fb5a3c2b 100644
--- a/benchmark/benchmark.cpp
+++ b/benchmark/benchmark.cpp
@@ -412,3 +412,22 @@ BENCH_MORPHOLOGY(dilate, 17);
 BENCH_MORPHOLOGY(erode, 3);
 BENCH_MORPHOLOGY(erode, 5);
 BENCH_MORPHOLOGY(erode, 17);
+
+template <typename T, typename Function>
+static void in_range(Function f, T lower_bound, T upper_bound,
+                     benchmark::State& state) {
+  bench_functor(state, [f, lower_bound, upper_bound]() {
+    (void)f(get_source_buffer_a<T>(), image_width * sizeof(T),
+            get_destination_buffer<uint8_t>(), image_width * sizeof(uint8_t),
+            image_width, image_height, lower_bound, upper_bound);
+  });
+}
+
+#define BENCH_IN_RANGE(benchname, name, lower_bound, upper_bound, type) \
+  static void benchname(benchmark::State& state) {                      \
+    in_range<type>(kleidicv_##name, lower_bound, upper_bound, state);   \
+  }                                                                     \
+  BENCHMARK(benchname)
+
+BENCH_IN_RANGE(in_range_u8, in_range_u8, 1, 2, uint8_t);
+BENCH_IN_RANGE(in_range_f32, in_range_f32, 1.111, 1.112, float);
diff --git a/conformity/opencv/common.h b/conformity/opencv/common.h
index b34720445..002266df2 100644
--- a/conformity/opencv/common.h
+++ b/conformity/opencv/common.h
@@ -15,6 +15,7 @@
 #include <cstring>
 #include <ctime>
 #include <exception>
+#include <limits>
 #include <string>
 #include <type_traits>
 
@@ -31,6 +32,21 @@
 
 #define KLEIDICV_CONFORMITY_MAX_MAT_DIMENSIONS 4
 
+template <typename T>
+static constexpr T min() {
+  return std::numeric_limits<T>::min();
+}
+
+template <typename T>
+static constexpr T lowest() {
+  return std::numeric_limits<T>::lowest();
+}
+
+template <typename T>
+static constexpr T max() {
+  return std::numeric_limits<T>::max();
+}
+
 class ExceptionWithErrno : public std::exception {
  public:
   explicit ExceptionWithErrno(const std::string& msg)
diff --git a/conformity/opencv/test_in_range.cpp b/conformity/opencv/test_in_range.cpp
new file mode 100644
index 000000000..e925c80b2
--- /dev/null
+++ b/conformity/opencv/test_in_range.cpp
@@ -0,0 +1,62 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "opencv2/core/hal/interface.h"
+#include "opencv2/imgproc/hal/interface.h"
+#include "tests.h"
+
+template <int LowerBound, int UpperBound>
+cv::Mat exec_in_range(cv::Mat& input) {
+  cv::Mat result;
+  cv::inRange(input, LowerBound / 1000.0, UpperBound / 1000.0, result);
+  return result;
+}
+
+#if MANAGER
+template <int LowerBound, int UpperBound, size_t Format>
+bool test_in_range(int index, RecreatedMessageQueue& request_queue,
+                   RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, Format);
+      rng.fill(input, cv::RNG::UNIFORM, 0, 255);
+      cv::Mat actual = exec_in_range<LowerBound, UpperBound>(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<uint8_t>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+#endif
+
+std::vector<test>& in_range_tests_get() {
+  // clang-format off
+  static std::vector<test> tests = {
+    TEST("InRange uint8,  lower_bound =           1, upper_bound =           2", (test_in_range<1000, 2000, CV_8UC1>), (exec_in_range<1000, 2000>)),
+    TEST("InRange uint8,  lower_bound =          10, upper_bound =          11", (test_in_range<10000, 11000, CV_8UC1>), (exec_in_range<10000, 11000>)),
+    TEST("InRange uint8,  lower_bound =           3, upper_bound =           4", (test_in_range<3000, 4000, CV_8UC1>), (exec_in_range<3000, 4000>)),
+    TEST("InRange uint8,  lower_bound =         254, upper_bound =         255", (test_in_range<254000, 255000, CV_8UC1>), (exec_in_range<254000, 255000>)),
+    TEST("InRange uint8,  lower_bound =          13, upper_bound =          12", (test_in_range<13000, 12000, CV_8UC1>), (exec_in_range<13000, 12000>)),
+    TEST("InRange float,  lower_bound = -999999.999, upper_bound = -999999.998", (test_in_range<(-999999999), (-999999998), CV_32FC1>), (exec_in_range<(-999999999), (-999999998)>)),
+    TEST("InRange float,  lower_bound = -999999.999, upper_bound = -999989.999", (test_in_range<(-999999999), (-999989999), CV_32FC1>), (exec_in_range<(-999999999), (-999989999)>)),
+    TEST("InRange float,  lower_bound =           1, upper_bound =           2", (test_in_range<1000, 2000, CV_32FC1>), (exec_in_range<1000, 2000>)),
+    TEST("InRange float,  lower_bound =       1.111, upper_bound =       1.112", (test_in_range<1111, 1112, CV_32FC1>), (exec_in_range<1111, 1112>)),
+    TEST("InRange float,  lower_bound =           3, upper_bound =           3", (test_in_range<3000, 3000, CV_32FC1>), (exec_in_range<3000, 3000>)),
+    TEST("InRange float,  lower_bound =      10.001, upper_bound =     10.0011", (test_in_range<10001, 100011, CV_32FC1>), (exec_in_range<10001, 100011>)),
+    TEST("InRange float,  lower_bound =      13.999, upper_bound =      13.998", (test_in_range<13999, 13998, CV_32FC1>), (exec_in_range<13999, 13998>)),
+    TEST("InRange float,  lower_bound =      14.999, upper_bound =      20.998", (test_in_range<14999, 20998, CV_32FC1>), (exec_in_range<14999, 20998>)),
+    TEST("InRange float,  lower_bound =  999999.998, upper_bound =  999999.999", (test_in_range<(999999998), (999999999), CV_32FC1>), (exec_in_range<(999999998), (999999999)>)),
+    TEST("InRange float,  lower_bound =  999989.999, upper_bound =  999999.999", (test_in_range<(999989999), (999999999), CV_32FC1>), (exec_in_range<(999989999), (999999999)>)),
+  };
+  // clang-format on
+  return tests;
+}
diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp
index c6cadbaf6..926de6874 100644
--- a/conformity/opencv/tests.cpp
+++ b/conformity/opencv/tests.cpp
@@ -35,6 +35,7 @@ std::vector<test> all_tests = merge_tests({
     resize_tests_get,
     scale_tests_get,
     min_max_tests_get,
+    in_range_tests_get,
 });
 
 #if MANAGER
diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h
index 64c0cc59d..da253cb66 100644
--- a/conformity/opencv/tests.h
+++ b/conformity/opencv/tests.h
@@ -21,5 +21,6 @@ std::vector<test>& float_conversion_tests_get();
 std::vector<test>& resize_tests_get();
 std::vector<test>& scale_tests_get();
 std::vector<test>& min_max_tests_get();
+std::vector<test>& in_range_tests_get();
 
 #endif  // KLEIDICV_OPENCV_CONFORMITY_TESTS_H_
diff --git a/doc/functionality.md b/doc/functionality.md
index cf1bfe05b..91d1858a8 100644
--- a/doc/functionality.md
+++ b/doc/functionality.md
@@ -21,6 +21,7 @@ See `doc/opencv.md` for details of the functionality available in OpenCV.
 | Scale                        |     |  x  |     |     |     |     |     |     |  x  |     |
 | CompareEqual                 |     |  x  |     |     |     |     |     |     |     |     |
 | CompareGreater               |     |  x  |     |     |     |     |     |     |     |     |
+| InRange                      |     |  x  |     |     |     |     |     |     |  x  |     |
 
 # Logical operations
 |                              | u8  |
diff --git a/doc/opencv.md b/doc/opencv.md
index adf9d4dc6..20c629301 100644
--- a/doc/opencv.md
+++ b/doc/opencv.md
@@ -213,3 +213,11 @@ Notes on parameters:
 * `operation` - flag specifying correspondence between the arrays.
 Supported [OpenCV cmp types](https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga0cc47ff833d40b58ecbe1d609a53d784) are:
   + `cv::CMP_GT`
+
+### `inRange`
+Checks whether array elements fall between user set lower and upper bounds. Currently only scalar bounds are supported.
+
+Notes on parameters:
+* `src_depth` - only supports `CV_8U` and `CV_32F` depths.
+* `dst_depth` - only supports `CV_8U` depth.
+* `cn` - only supports 1 channel.
diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h
index 03836ff2e..6a7b1b141 100644
--- a/kleidicv/include/kleidicv/kleidicv.h
+++ b/kleidicv/include/kleidicv/kleidicv.h
@@ -1607,6 +1607,38 @@ KLEIDICV_API_DECLARATION(kleidicv_float_conversion_u8_f32, const uint8_t *src,
                          size_t src_stride, float *dst, size_t dst_stride,
                          size_t width, size_t height);
 
+/// Performs a per element comparison in `src` with respect to caller defined
+/// lower and upper bounds. For the elements exceeding these bounds, the
+/// corresponding elements in `dst` are set to 0 and elements within to 255.
+///
+/// Width and height are the same for the source and for the destination. Number
+/// of elements is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS.
+///
+/// @param src          Pointer to the source data. Must be non-null.
+/// @param src_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the source data. Must
+///                     not be less than width * sizeof(type), except for
+///                     single-row images.
+/// @param dst          Pointer to the first destination data. Must be non-null.
+/// @param dst_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the destination data. Must
+///                     not be less than width * sizeof(type), except for
+///                     single-row images.
+/// @param width        Number of elements in a row.
+/// @param height       Number of rows in the data.
+/// @param lower_bound  The lower bound of the interval.
+/// @param upper_bound  The upper bound of the interval.
+///
+KLEIDICV_API_DECLARATION(kleidicv_in_range_u8, const uint8_t *src,
+                         size_t src_stride, uint8_t *dst, size_t dst_stride,
+                         size_t width, size_t height, uint8_t lower_bound,
+                         uint8_t upper_bound);
+/// @copydoc kleidicv_in_range_u8
+KLEIDICV_API_DECLARATION(kleidicv_in_range_f32, const float *src,
+                         size_t src_stride, uint8_t *dst, size_t dst_stride,
+                         size_t width, size_t height, float lower_bound,
+                         float upper_bound);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/kleidicv/src/arithmetics/in_range_api.cpp b/kleidicv/src/arithmetics/in_range_api.cpp
new file mode 100644
index 000000000..13c7fcded
--- /dev/null
+++ b/kleidicv/src/arithmetics/in_range_api.cpp
@@ -0,0 +1,40 @@
+// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "kleidicv/dispatch.h"
+#include "kleidicv/kleidicv.h"
+
+namespace kleidicv {
+
+namespace neon {
+template <typename T>
+kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst,
+                          size_t dst_stride, size_t width, size_t height,
+                          T lower_bound, T upper_bound);
+}  // namespace neon
+
+namespace sve2 {
+template <typename T>
+kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst,
+                          size_t dst_stride, size_t width, size_t height,
+                          T lower_bound, T upper_bound);
+}  // namespace sve2
+
+namespace sme2 {
+template <typename T>
+kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst,
+                          size_t dst_stride, size_t width, size_t height,
+                          T lower_bound, T upper_bound);
+}  // namespace sme2
+
+}  // namespace kleidicv
+
+#define KLEIDICV_DEFINE_C_API(name, type)                     \
+  KLEIDICV_MULTIVERSION_C_API(                                \
+      name, &kleidicv::neon::in_range<type>,                  \
+      KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::in_range<type>), \
+      KLEIDICV_SME2_IMPL_IF(&kleidicv::sme2::in_range<type>))
+
+KLEIDICV_DEFINE_C_API(kleidicv_in_range_u8, uint8_t);
+KLEIDICV_DEFINE_C_API(kleidicv_in_range_f32, float);
diff --git a/kleidicv/src/arithmetics/in_range_neon.cpp b/kleidicv/src/arithmetics/in_range_neon.cpp
new file mode 100644
index 000000000..d0614d926
--- /dev/null
+++ b/kleidicv/src/arithmetics/in_range_neon.cpp
@@ -0,0 +1,136 @@
+// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "kleidicv/kleidicv.h"
+#include "kleidicv/neon.h"
+
+namespace kleidicv::neon {
+
+template <typename ScalarType>
+class InRange;
+
+template <>
+class InRange<uint8_t> : public UnrollTwice {
+ public:
+  using VecTraits = neon::VecTraits<uint8_t>;
+  using VectorType = typename VecTraits::VectorType;
+
+  InRange(uint8_t lower_bound, uint8_t upper_bound)
+      : lower_bound_vect_{vdupq_n(lower_bound)},
+        upper_bound_vect_{vdupq_n(upper_bound)},
+        lower_bound_{lower_bound},
+        upper_bound_{upper_bound} {}
+
+  VectorType vector_path(VectorType src) {
+    return vandq(vcgeq(src, lower_bound_vect_), vcleq(src, upper_bound_vect_));
+  }
+
+  // NOLINTBEGIN(readability-make-member-function-const)
+  uint8_t scalar_path(uint8_t src) {
+    return (src >= lower_bound_ && src <= upper_bound_) ? 0xFF : 0;
+  }
+  // NOLINTEND(readability-make-member-function-const)
+
+ private:
+  VectorType lower_bound_vect_;
+  VectorType upper_bound_vect_;
+  uint8_t lower_bound_;
+  uint8_t upper_bound_;
+};  // end of class InRange<uint8_t>
+
+template <>
+class InRange<float> {
+ public:
+  using SrcVecTraits = neon::VecTraits<float>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using SrcVector4Type = typename SrcVecTraits::Vector4Type;
+  using DstVecTraits = neon::VecTraits<uint8_t>;
+  using DstVectorType = typename DstVecTraits::VectorType;
+
+  InRange(float lower_bound, float upper_bound)
+      : lower_bound_vect_{vdupq_n(lower_bound)},
+        upper_bound_vect_{vdupq_n(upper_bound)},
+        lower_bound_{lower_bound},
+        upper_bound_{upper_bound} {}
+
+  void process_row(size_t width, Columns<const float> src,
+                   Columns<uint8_t> dst) {
+    LoopUnroll{width, SrcVecTraits::num_lanes()}
+        .unroll_n_times<4>([&](size_t step) {
+          SrcVector4Type src_vector = vld1q_f32_x4(&src[0]);
+          DstVectorType result_vector = vector_path(src_vector);
+          vst1q(&dst[0], result_vector);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) {
+          for (size_t index = 0; index < length; ++index) {
+            disable_loop_vectorization();
+            float f = src[ptrdiff_t(index)];
+            dst[ptrdiff_t(index)] =
+                (f >= lower_bound_ && f <= upper_bound_) ? 0xFF : 0;
+          }
+        });
+  }
+
+ private:
+  DstVectorType vector_path(SrcVector4Type src) {
+    SrcVectorType src0 = src.val[0];
+    SrcVectorType src1 = src.val[1];
+    SrcVectorType src2 = src.val[2];
+    SrcVectorType src3 = src.val[3];
+    uint32x4_t res00 =
+        vandq(vcgeq(src0, lower_bound_vect_), vcleq(src0, upper_bound_vect_));
+    uint32x4_t res01 =
+        vandq(vcgeq(src1, lower_bound_vect_), vcleq(src1, upper_bound_vect_));
+    uint32x4_t res11 =
+        vandq(vcgeq(src2, lower_bound_vect_), vcleq(src2, upper_bound_vect_));
+    uint32x4_t res12 =
+        vandq(vcgeq(src3, lower_bound_vect_), vcleq(src3, upper_bound_vect_));
+    // AND-ing the results of the compare ops sets all 32 bits to all 0's or all
+    // 1's. Unzipping them twice chooses 8 bits from those 32.
+    uint16x8_t res0 =
+        vuzp1q_u16(vreinterpretq_u16_u32(res00), vreinterpretq_u16_u32(res01));
+    uint16x8_t res1 =
+        vuzp1q_u16(vreinterpretq_u16_u32(res11), vreinterpretq_u16_u32(res12));
+    return vuzp1q_u8(vreinterpretq_u8_u16(res0), vreinterpretq_u8_u16(res1));
+  }
+
+  SrcVectorType lower_bound_vect_;
+  SrcVectorType upper_bound_vect_;
+  float lower_bound_;
+  float upper_bound_;
+};  // end of class InRange<float>
+
+template <typename T>
+kleidicv_error_t in_range(const T *src, size_t src_stride, uint8_t *dst,
+                          size_t dst_stride, size_t width, size_t height,
+                          T lower_bound, T upper_bound) {
+  CHECK_POINTER_AND_STRIDE(src, src_stride, height);
+  CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
+  CHECK_IMAGE_SIZE(width, height);
+
+  InRange<T> operation{lower_bound, upper_bound};
+  Rectangle rect{width, height};
+  Rows<const T> src_rows{src, src_stride};
+  Rows<uint8_t> dst_rows{dst, dst_stride};
+
+  if constexpr (std::is_same_v<T, uint8_t>) {
+    apply_operation_by_rows(operation, rect, src_rows, dst_rows);
+  } else {
+    zip_rows(operation, rect, src_rows, dst_rows);
+  }
+
+  return KLEIDICV_OK;
+}
+
+#define KLEIDICV_INSTANTIATE_TEMPLATE(type)                                \
+  template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t in_range<type>(       \
+      const type *src, size_t src_stride, uint8_t *dst, size_t dst_stride, \
+      size_t width, size_t height, type lower_bound, type upper_bound)
+
+KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(float);
+
+}  // namespace kleidicv::neon
diff --git a/kleidicv/src/arithmetics/in_range_sc.h b/kleidicv/src/arithmetics/in_range_sc.h
new file mode 100644
index 000000000..9b6710e08
--- /dev/null
+++ b/kleidicv/src/arithmetics/in_range_sc.h
@@ -0,0 +1,180 @@
+// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef KLEIDICV_IN_RANGE_SC_H
+#define KLEIDICV_IN_RANGE_SC_H
+
+#include "kleidicv/kleidicv.h"
+#include "kleidicv/sve2.h"
+
+namespace KLEIDICV_TARGET_NAMESPACE {
+
+template <typename ScalarType>
+class InRange;
+
+template <>
+class InRange<uint8_t> : public UnrollTwice {
+ public:
+  using ContextType = Context;
+  using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<uint8_t>;
+  using VectorType = typename VecTraits::VectorType;
+  using SignedScalarType = typename std::make_signed<uint8_t>::type;
+  using SignedVecTraits =
+      KLEIDICV_TARGET_NAMESPACE::VecTraits<SignedScalarType>;
+  using SignedVectorType = typename SignedVecTraits::VectorType;
+
+  InRange(VectorType &vec_lower_bound,
+          VectorType &vec_upper_bound) KLEIDICV_STREAMING_COMPATIBLE
+      : vec_lower_bound_(vec_lower_bound),
+        vec_upper_bound_(vec_upper_bound) {}
+
+  // NOLINTBEGIN(readability-make-member-function-const)
+  VectorType vector_path(ContextType ctx,
+                         VectorType src) KLEIDICV_STREAMING_COMPATIBLE {
+    svbool_t pg = ctx.predicate();
+
+    VectorType diff_low = svsub_x(pg, src, vec_lower_bound_);
+    // Shift subtraction result 7 bits to the right, i.e. divide by 2^7 to keep
+    // sign bit only.
+    VectorType result_within_low =
+        VecTraits::svreinterpret(SignedVecTraits::svasr_n(
+            pg, SignedVecTraits::svreinterpret(diff_low), 7));
+
+    VectorType diff_up = svsub_x(pg, vec_upper_bound_, src);
+    VectorType result_within_up =
+        VecTraits::svreinterpret(SignedVecTraits::svasr_n(
+            pg, SignedVecTraits::svreinterpret(diff_up), 7));
+
+    // src[i] < lower_bound OR src[i] > upper_bound
+    VectorType out_of_range = svorr_x(pg, result_within_low, result_within_up);
+    // NOT(out_of_range) to set within elements to 1 and the rest to 0.
+    VectorType within_range = svcnot_x(pg, out_of_range);
+    // Negate to set elements within to 0xFF (all 1s).
+    return VecTraits::svreinterpret(
+        svqneg_x(pg, SignedVecTraits::svreinterpret(within_range)));
+  }
+  // NOLINTEND(readability-make-member-function-const)
+
+ private:
+  VectorType &vec_lower_bound_;
+  VectorType &vec_upper_bound_;
+};  // end of class InRange<uint8_t>
+
+template <>
+class InRange<float> {
+ public:
+  using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<uint8_t>;
+  using DstVectorType = typename DstVecTraits::VectorType;
+
+  InRange(float lower_bound, float upper_bound) KLEIDICV_STREAMING_COMPATIBLE
+      : lower_bound_(lower_bound),
+        upper_bound_(upper_bound) {}
+
+  void process_row(size_t width, Columns<const float> src,
+                   Columns<uint8_t> dst) KLEIDICV_STREAMING_COMPATIBLE {
+    LoopUnroll{width, SrcVecTraits::num_lanes()}
+        .unroll_n_times<4>([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE {
+          svbool_t pg_src = SrcVecTraits::svptrue();
+          SrcVectorType src_v0 = svld1(pg_src, &src[0]);
+          SrcVectorType src_v1 = svld1_vnum(pg_src, &src[0], 1);
+          SrcVectorType src_v2 = svld1_vnum(pg_src, &src[0], 2);
+          SrcVectorType src_v3 = svld1_vnum(pg_src, &src[0], 3);
+          DstVectorType res0 =
+              vector_path(pg_src, src_v0, src_v1, src_v2, src_v3);
+          svbool_t pg_dst = DstVecTraits::svptrue();
+          svst1(pg_dst, &dst[0], res0);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE {
+          size_t index = 0;
+          svbool_t pg = SrcVecTraits::svwhilelt(index, length);
+          while (svptest_first(SrcVecTraits::svptrue(), pg)) {
+            SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
+            DstVectorType result_vector = remaining_path(pg, src_vector);
+            svst1b(pg, &dst[ptrdiff_t(index)],
+                   svreinterpret_u32(result_vector));
+            // Update loop counter and calculate the next governing predicate.
+            index += SrcVecTraits::num_lanes();
+            pg = SrcVecTraits::svwhilelt(index, length);
+          }
+        });
+  }
+
+ private:
+  // NOLINTBEGIN(readability-make-member-function-const)
+  DstVectorType vector_path(svbool_t full_pg, SrcVectorType fsrc0,
+                            SrcVectorType fsrc1, SrcVectorType fsrc2,
+                            SrcVectorType fsrc3) KLEIDICV_STREAMING_COMPATIBLE {
+    svbool_t pred0 = svand_z(full_pg, svcmpge(full_pg, fsrc0, lower_bound_),
+                             svcmple(full_pg, fsrc0, upper_bound_));
+    auto res00 = svsel(pred0, svdup_u32(0xFF), svdup_u32(0));
+
+    svbool_t pred1 = svand_z(full_pg, svcmpge(full_pg, fsrc1, lower_bound_),
+                             svcmple(full_pg, fsrc1, upper_bound_));
+    auto res01 = svsel(pred1, svdup_u32(0xFF), svdup_u32(0));
+
+    svbool_t pred2 = svand_z(full_pg, svcmpge(full_pg, fsrc2, lower_bound_),
+                             svcmple(full_pg, fsrc2, upper_bound_));
+    auto res10 = svsel(pred2, svdup_u32(0xFF), svdup_u32(0));
+
+    svbool_t pred3 = svand_z(full_pg, svcmpge(full_pg, fsrc3, lower_bound_),
+                             svcmple(full_pg, fsrc3, upper_bound_));
+    auto res11 = svsel(pred3, svdup_u32(0xFF), svdup_u32(0));
+
+    auto res0 =
+        svuzp1(svreinterpret_u16_u32(res00), svreinterpret_u16_u32(res01));
+    auto res1 =
+        svuzp1(svreinterpret_u16_u32(res10), svreinterpret_u16_u32(res11));
+    return svuzp1(svreinterpret_u8_u16(res0), svreinterpret_u8_u16(res1));
+  }
+  // NOLINTEND(readability-make-member-function-const)
+
+  // NOLINTBEGIN(readability-make-member-function-const)
+  DstVectorType remaining_path(svbool_t &pg, SrcVectorType src)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    svbool_t predicate = svand_z(pg, svcmpge(pg, src, lower_bound_),
+                                 svcmple(pg, src, upper_bound_));
+    return svsel(predicate, DstVecTraits::svdup(0xFF), DstVecTraits::svdup(0));
+  }
+  // NOLINTEND(readability-make-member-function-const)
+
+  float lower_bound_;
+  float upper_bound_;
+};  // end of class InRange<float>
+
+template <typename T>
+kleidicv_error_t in_range_sc(const T *src, size_t src_stride, uint8_t *dst,
+                             size_t dst_stride, size_t width, size_t height,
+                             T lower_bound,
+                             T upper_bound) KLEIDICV_STREAMING_COMPATIBLE {
+  CHECK_POINTER_AND_STRIDE(src, src_stride, height);
+  CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
+  CHECK_IMAGE_SIZE(width, height);
+
+  Rectangle rect{width, height};
+  Rows<const T> src_rows{src, src_stride};
+  Rows<uint8_t> dst_rows{dst, dst_stride};
+
+  using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<T>;
+  using VectorType = typename VecTraits::VectorType;
+
+  if constexpr (std::is_same_v<T, uint8_t>) {
+    VectorType vec_lower_bound = VecTraits::svdup(lower_bound);
+    VectorType vec_upper_bound = VecTraits::svdup(upper_bound);
+    InRange<T> operation{vec_lower_bound, vec_upper_bound};
+    apply_operation_by_rows(operation, rect, src_rows, dst_rows);
+  } else {
+    InRange<T> operation{lower_bound, upper_bound};
+    zip_rows(operation, rect, src_rows, dst_rows);
+  }
+
+  return KLEIDICV_OK;
+}
+
+}  // namespace KLEIDICV_TARGET_NAMESPACE
+
+#endif  // KLEIDICV_IN_RANGE_SC_H
diff --git a/kleidicv/src/arithmetics/in_range_sme2.cpp b/kleidicv/src/arithmetics/in_range_sme2.cpp
new file mode 100644
index 000000000..bbcf9e87a
--- /dev/null
+++ b/kleidicv/src/arithmetics/in_range_sme2.cpp
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "in_range_sc.h"
+
+namespace kleidicv::sme2 {
+
+template <typename T>
+KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
+in_range(const T *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
+         size_t width, size_t height, T lower_bound, T upper_bound) {
+  return in_range_sc<T>(src, src_stride, dst, dst_stride, width, height,
+                        lower_bound, upper_bound);
+}
+
+#define KLEIDICV_INSTANTIATE_TEMPLATE(type)                                \
+  template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t in_range<type>(       \
+      const type *src, size_t src_stride, uint8_t *dst, size_t dst_stride, \
+      size_t width, size_t height, type lower_bound, type upper_bound)
+
+KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(float);
+
+}  // namespace kleidicv::sme2
diff --git a/kleidicv/src/arithmetics/in_range_sve2.cpp b/kleidicv/src/arithmetics/in_range_sve2.cpp
new file mode 100644
index 000000000..1b241ba57
--- /dev/null
+++ b/kleidicv/src/arithmetics/in_range_sve2.cpp
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "in_range_sc.h"
+
+namespace kleidicv::sve2 {
+
+template <typename T>
+KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
+in_range(const T *src, size_t src_stride, uint8_t *dst, size_t dst_stride,
+         size_t width, size_t height, T lower_bound, T upper_bound) {
+  return in_range_sc<T>(src, src_stride, dst, dst_stride, width, height,
+                        lower_bound, upper_bound);
+}
+
+#define KLEIDICV_INSTANTIATE_TEMPLATE(type)                                \
+  template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t in_range<type>(       \
+      const type *src, size_t src_stride, uint8_t *dst, size_t dst_stride, \
+      size_t width, size_t height, type lower_bound, type upper_bound)
+
+KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(float);
+
+}  // namespace kleidicv::sve2
diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh
index 8f16dc0af..6d7030843 100755
--- a/scripts/benchmark/run_benchmarks_4K.sh
+++ b/scripts/benchmark/run_benchmarks_4K.sh
@@ -87,6 +87,9 @@ benchmarks=(
     "UintToFloat: opencv_perf_core '*convertTo/*' '(3840x2160, 8UC1,  32FC1, 1, 1, 0)'"
 
     "CompareGt: opencv_perf_core '*compare/*' '(3840x2160, 8UC1, CMP_GT)'"
+
+    "InRange_U8:  opencv_perf_core '*inRangeScalar/*' '(3840x2160,  8UC1, 1, 2)'"
+    "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(3840x2160, 32FC1, 1, 2)'"
 )
 
 for idx in "${!benchmarks[@]}"; do
diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh
index b1941b87b..8a183409d 100755
--- a/scripts/benchmark/run_benchmarks_FHD.sh
+++ b/scripts/benchmark/run_benchmarks_FHD.sh
@@ -87,6 +87,9 @@ benchmarks=(
     "UintToFloat: opencv_perf_core '*convertTo/*' '(1920x1080, 8UC1,  32FC1, 1, 1, 0)'"
 
     "CompareGt: opencv_perf_core '*compare/*' '(1920x1080, 8UC1, CMP_GT)'"
+
+    "InRange_U8:  opencv_perf_core '*inRangeScalar/*' '(1920x1080,  8UC1, 1, 2)'"
+    "InRange_F32: opencv_perf_core '*inRangeScalar/*' '(1920x1080, 32FC1, 1, 2)'"
 )
 
 for idx in "${!benchmarks[@]}"; do
diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh
index 5f30336b0..4aac91f32 100755
--- a/scripts/ci-opencv.sh
+++ b/scripts/ci-opencv.sh
@@ -67,6 +67,7 @@ CORE_TEST_PATTERNS=(
   '*Core_minMaxIdx*'
   '*Core_Array*'
   '*Compare*'
+  '*Core_InRange/*'
 )
 CORE_TEST_PATTERNS_STR="$(join_strings_with_colon "${CORE_TEST_PATTERNS[*]}")"
 ../../../conformity/opencv_kleidicv/bin/opencv_test_core \
diff --git a/test/api/test_in_range.cpp b/test/api/test_in_range.cpp
new file mode 100644
index 000000000..b22ee5cd9
--- /dev/null
+++ b/test/api/test_in_range.cpp
@@ -0,0 +1,483 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include "framework/array.h"
+#include "framework/generator.h"
+#include "framework/utils.h"
+#include "kleidicv/kleidicv.h"
+
+#define KLEIDICV_IN_RANGE(type, suffix) \
+  KLEIDICV_API(in_range, kleidicv_in_range_##suffix, type)
+
+KLEIDICV_IN_RANGE(uint8_t, u8);
+KLEIDICV_IN_RANGE(float, f32);
+
+template <typename ElementType>
+class InRangeTest final {
+ private:
+  template <typename T>
+  static constexpr T min() {
+    return std::numeric_limits<T>::min();
+  }
+
+  template <typename T>
+  static constexpr T max() {
+    return std::numeric_limits<T>::max();
+  }
+  struct Elements {
+    size_t width;
+    size_t height;
+    ElementType lower_bound;
+    ElementType upper_bound;
+
+    std::vector<std::vector<ElementType>> source_rows;
+    std::vector<std::vector<uint8_t>> expected_rows;
+
+    Elements(size_t _width, size_t _height, ElementType _lower_bound,
+             ElementType _upper_bound,
+             std::vector<std::vector<ElementType>>&& _source_rows,
+             std::vector<std::vector<uint8_t>>&& _expected_rows)
+        : width(_width),
+          height(_height),
+          lower_bound(_lower_bound),
+          upper_bound(_upper_bound),
+          source_rows(std::move(_source_rows)),
+          expected_rows(std::move(_expected_rows)) {}
+  };
+
+  static float floatval(uint32_t v) {
+    float result;  // Avoid cppcoreguidelines-init-variables. NOLINT
+    static_assert(sizeof(result) == sizeof(v));
+    memcpy(&result, &v, sizeof(result));
+    return result;
+  }
+
+  const float quietNaN = std::numeric_limits<float>::quiet_NaN();
+  const float signalingNaN = std::numeric_limits<float>::signaling_NaN();
+  const float posInfinity = std::numeric_limits<float>::infinity();
+  const float negInfinity = -std::numeric_limits<float>::infinity();
+
+  const float minusNaN = floatval(0xFF800001);
+  const float plusNaN = floatval(0x7F800001);
+  const float plusZero = 0.0F;
+  const float minusZero = -0.0F;
+
+  const float oneNaN = floatval(0x7FC00001);
+  const float zeroDivZero = -std::numeric_limits<float>::quiet_NaN();
+  const float floatMin = std::numeric_limits<float>::min();
+  const float floatMax = std::numeric_limits<float>::max();
+
+  const float posSubnormalMin = std::numeric_limits<float>::denorm_min();
+  const float posSubnormalMax = floatval(0x007FFFFF);
+  const float negSubnormalMin = -std::numeric_limits<float>::denorm_min();
+  const float negSubnormalMax = floatval(0x807FFFFF);
+
+  void calculate_expected(const test::Array2D<ElementType>& source,
+                          test::Array2D<uint8_t>& expected,
+                          ElementType lower_bound = 50,
+                          ElementType upper_bound = 100) {
+    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
+      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
+        uint8_t calculated = 0;
+        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
+        ElementType current_element = *source.at(hindex, vindex);
+        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
+        if ((current_element >= lower_bound) &&
+            (current_element <= upper_bound)) {
+          calculated = 255;
+        } else {
+          calculated = 0;
+        }
+        *expected.at(hindex, vindex) = calculated;
+      }
+    }
+  }
+
+  template <typename T>
+  size_t get_linear_height(size_t width, size_t minimum_size) {
+    size_t image_size =
+        std::max(minimum_size, static_cast<size_t>(max<T>() - min<T>()));
+    size_t height = image_size / width + 1;
+
+    return height;
+  }
+
+  template <typename T>
+  std::tuple<test::Array2D<T>, test::Array2D<uint8_t>, test::Array2D<uint8_t>>
+  get_linear_arrays(size_t width, size_t height) {
+    test::Array2D<T> source(width, height, 1, 1);
+    test::Array2D<uint8_t> expected(width, height, 1, 1);
+    test::Array2D<uint8_t> actual(width, height, 1, 1);
+
+    if constexpr (std::is_same_v<float, T>) {
+      test::GenerateLinearSeries<T> generator(min<uint8_t>());
+      source.fill(generator);
+    } else if constexpr (std::is_same_v<uint8_t, T>) {
+      test::GenerateLinearSeries<T> generator(min<T>());
+      source.fill(generator);
+    } else {
+      static_assert(sizeof(T), "should never happen");
+    }
+
+    calculate_expected(source, expected);
+
+    return {source, expected, actual};
+  }
+
+  std::array<size_t, 1> inputs_padding_{};
+  std::array<size_t, 1> outputs_padding_{};
+
+ public:
+  // Tests special float values.
+  template <typename T, std::enable_if_t<std::is_same_v<float, T>, bool> = true>
+  const Elements& get_custom_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        4, 6,
+        -11.4, 1111.10,
+        {{
+          {       quietNaN,    signalingNaN,     posInfinity,     negInfinity},
+          {       minusNaN,         plusNaN,        plusZero,       minusZero},
+          {         oneNaN,     zeroDivZero,        floatMin,        floatMax},
+          {posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax},
+          {        1111.11,        -1112.22,          113.33,          114.44},
+          {           11.5,            12.5,           -11.5,           -12.5},
+        }},
+        {{
+          { 0,    0,   0,   0},
+          { 0,    0, 255, 255},
+          {  0,   0, 255,   0},
+          {255, 255, 255, 255},
+          {  0,   0, 255, 255},
+          {255, 255,   0,   0},
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  // float and uint8_t behave differently around their max values.
+  template <typename T, std::enable_if_t<std::is_same_v<float, T>, bool> = true>
+  const Elements& get_max_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        2, 3,
+        max<T>() - 100, max<T>() - 1,
+        {{
+          {            10, max<T>() - 105},
+          {max<T>() - 100,   max<T>() - 2},
+          {  max<T>() - 1,       max<T>()},
+        }},
+        {{
+          {  0, 255},
+          {255, 255},
+          {255, 255},
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  // More extensive test for uint8_t values.
+  template <typename T,
+            std::enable_if_t<std::is_same_v<uint8_t, T>, bool> = true>
+  const Elements& get_custom_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        16, 16,
+        1, 10,
+        {{
+          {177,  48,   1,  58, 223, 153,  36, 183, 113, 234, 218, 216, 129, 243, 230, 185},
+          { 45, 187,  58,  65, 247,  68, 122, 158, 239,  84, 171,  15, 180, 156, 227,  21},
+          { 53,  95, 117, 138, 197, 251,  23, 120, 201, 223, 219,  50,  37,   4, 112, 129},
+          {240, 245, 130,   3,  65, 182,  19, 254, 241, 216, 198,  22, 101, 189, 151,  60},
+          {234, 147, 201,  67, 121,  42, 173, 151, 108, 220, 190, 230,  23, 135, 139,  66},
+          { 62, 180, 191,  65,  74, 190, 237, 102,   6, 196,  14,  76, 192,  12, 223, 232},
+          { 66, 167,  77,  86,  85, 228, 104, 127, 251, 126,  56, 107,  93, 153, 222,   2},
+          {160,  74, 225, 116, 243, 193, 166, 157, 193, 233, 109, 141,  86,  83, 156, 203},
+          {  5, 225,  87,  53,   5, 104, 122, 157,  68,  45,  39, 229, 133, 138,  47, 231},
+          {225,   4, 246,   0,  89, 227, 150, 114,  45, 144,  98, 192, 245, 232, 214,  39},
+          {124, 119, 166,  74,  34, 237, 107, 200, 243, 139, 241, 147,  99,  34, 101,  73},
+          {234, 157, 175, 116,  51,  73,  93, 125, 125, 110,  96, 252, 209,  60, 169, 138},
+          {248, 224, 173,  23,  55, 103,  90,  71, 125, 224,  46, 212,  11,  25, 102, 151},
+          {183, 199, 239,  93,  17, 125,   4, 109,  55,  89, 196, 136, 164,  76, 159, 223},
+          {172,  12,  50,  36, 123, 102, 127, 119, 251,  46,  85,  25,  64, 140, 111, 163},
+          {120,  19,  56, 222,  99, 198, 180, 115, 150, 168, 222, 198,  91, 154,  35, 133},
+        }},
+        {{
+          {  0,   0, 255,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255,   0,   0},
+          {  0,   0,   0, 255,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0, 255,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 255},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {255,   0,   0,   0, 255,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0, 255,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0, 255,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+          {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0},
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  // float and uint8_t behave differently around their max values.
+  template <typename T,
+            std::enable_if_t<std::is_same_v<uint8_t, T>, bool> = true>
+  const Elements& get_max_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        2, 3,
+        max<T>() - 100, max<T>() - 1,
+        {{
+          {max<T>() - 105,           10},
+          {max<T>() - 100, max<T>() - 2},
+          {  max<T>() - 1,     max<T>()},
+        }},
+        {{
+          {  0,   0},
+          {255, 255},
+          {255,   0},
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  // lower_bound > upperbound -> no values within bounds
+  const Elements& get_empty_interval_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        3, 1,
+        13, 0,
+        {{
+          {12, 13, 14}
+        }},
+        {{
+          { 0, 0, 0}
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  const Elements& get_same_bounds_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        3, 1,
+        13, 13,
+        {{
+          {12, 13, 14}
+        }},
+        {{
+          { 0, 255, 0}
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  const Elements& get_min_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        3, 1,
+        min<ElementType>(), min<ElementType>() + 1,
+        {{
+          {min<ElementType>(), min<ElementType>() + 1, min<ElementType>() + 2}
+        }},
+        {{
+          { 255, 255, 0}
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  const Elements& get_zero_one_test_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        3, 1,
+        0, 1,
+        {{
+          {0, 1, 2}
+        }},
+        {{
+          { 255, 255, 0}
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  // minimum_size set by caller to trigger the 'big' conversion path.
+  template <typename T>
+  void test_linear(size_t width, size_t minimum_size = 1, T lower_bound = 50,
+                   T upper_bound = 100) {
+    size_t height = 0;
+    height = get_linear_height<uint8_t>(width, minimum_size);
+
+    auto arrays = get_linear_arrays<T>(width, height);
+
+    test::Array2D<T>& source = std::get<0>(arrays);
+    test::Array2D<uint8_t>& expected = std::get<1>(arrays);
+    test::Array2D<uint8_t>& actual = std::get<2>(arrays);
+
+    ASSERT_EQ(KLEIDICV_OK, (in_range<T>()(source.data(), source.stride(),
+                                          actual.data(), actual.stride(), width,
+                                          height, lower_bound, upper_bound)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test(const Elements& elements_list) {
+    const size_t width = elements_list.width;
+    const size_t height = elements_list.height;
+    const ElementType lower_bound = elements_list.lower_bound;
+    const ElementType upper_bound = elements_list.upper_bound;
+
+    test::Array2D<ElementType> source(width, height);
+    test::Array2D<uint8_t> expected(width, height);
+    test::Array2D<uint8_t> actual(width, height);
+
+    for (size_t i = 0; i < height; i++) {
+      source.set(i, 0, elements_list.source_rows[i]);
+      expected.set(i, 0, elements_list.expected_rows[i]);
+    }
+
+    ASSERT_EQ(KLEIDICV_OK,
+              (in_range<ElementType>()(source.data(), source.stride(),
+                                       actual.data(), actual.stride(), width,
+                                       height, lower_bound, upper_bound)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  InRangeTest<ElementType>& with_paddings(
+      std::initializer_list<size_t> inputs_padding,
+      std::initializer_list<size_t> outputs_padding) {
+    size_t i = 0;
+    for (size_t p : inputs_padding) {
+      inputs_padding_[i++] = p;
+    }
+    size_t j = 0;
+    for (size_t q : outputs_padding) {
+      outputs_padding_[j++] = q;
+    }
+    return *this;
+  }
+
+  InRangeTest() {
+    inputs_padding_.fill(0);
+    outputs_padding_.fill(0);
+  }
+
+  virtual ~InRangeTest() = default;
+};
+
+template <typename ElementType>
+class InRange : public testing::Test {};
+
+using ElementTypes = ::testing::Types<uint8_t, float>;
+
+TYPED_TEST_SUITE(InRange, ElementTypes);
+
+// Tests various padding combinations.
+TYPED_TEST(InRange, Padding) {
+  auto elements_list = InRangeTest<TypeParam>{}.get_same_bounds_test_elements();
+  InRangeTest<TypeParam>{}.with_paddings({0}, {0}).test(elements_list);
+  InRangeTest<TypeParam>{}.with_paddings({0}, {1}).test(elements_list);
+  InRangeTest<TypeParam>{}.with_paddings({1}, {0}).test(elements_list);
+  InRangeTest<TypeParam>{}.with_paddings({1}, {1}).test(elements_list);
+}
+
+TYPED_TEST(InRange, Scalar) {
+  InRangeTest<TypeParam>{}.template test_linear<TypeParam>(
+      test::Options::vector_length() - 1);
+}
+
+TYPED_TEST(InRange, Vector) {
+  InRangeTest<TypeParam>{}.template test_linear<TypeParam>(
+      test::Options::vector_length() * 2);
+}
+
+TYPED_TEST(InRange, TestEmptyInterval) {
+  auto elements_list =
+      InRangeTest<TypeParam>{}.get_empty_interval_test_elements();
+  InRangeTest<TypeParam>{}.test(elements_list);
+}
+
+TYPED_TEST(InRange, TestSameBounds) {
+  auto elements_list = InRangeTest<TypeParam>{}.get_same_bounds_test_elements();
+  InRangeTest<TypeParam>{}.test(elements_list);
+}
+
+TYPED_TEST(InRange, TestCustom) {
+  auto elements_list =
+      InRangeTest<TypeParam>{}.template get_custom_test_elements<TypeParam>();
+  InRangeTest<TypeParam>{}.test(elements_list);
+}
+
+TYPED_TEST(InRange, TestMax) {
+  auto elements_list =
+      InRangeTest<TypeParam>{}.template get_max_test_elements<TypeParam>();
+  InRangeTest<TypeParam>{}.test(elements_list);
+}
+
+TYPED_TEST(InRange, TestMin) {
+  auto elements_list = InRangeTest<TypeParam>{}.get_min_test_elements();
+  InRangeTest<TypeParam>{}.test(elements_list);
+}
+
+TYPED_TEST(InRange, TestZeroOne) {
+  auto elements_list = InRangeTest<TypeParam>{}.get_zero_one_test_elements();
+  InRangeTest<TypeParam>{}.test(elements_list);
+}
+
+TYPED_TEST(InRange, NullPointer) {
+  const TypeParam src[1] = {};
+  uint8_t dst[1];
+  test::test_null_args(in_range<TypeParam>(), src, sizeof(uint8_t), dst,
+                       sizeof(TypeParam), 1, 1, 1, 1);
+}
+
+TYPED_TEST(InRange, Misalignment) {
+  if (sizeof(TypeParam) == 1) {
+    // misalignment impossible
+    return;
+  }
+  TypeParam src[2] = {};
+  uint8_t dst[2];
+  EXPECT_EQ(KLEIDICV_ERROR_ALIGNMENT,
+            in_range<TypeParam>()(src, sizeof(TypeParam) + 1, dst,
+                                  sizeof(uint8_t), 2, 2, 1, 1));
+}
+
+TYPED_TEST(InRange, ZeroImageSize) {
+  TypeParam src[1] = {};
+  uint8_t dst[1];
+  EXPECT_EQ(KLEIDICV_OK, in_range<TypeParam>()(src, sizeof(TypeParam), dst,
+                                               sizeof(uint8_t), 0, 1, 1, 1));
+  EXPECT_EQ(KLEIDICV_OK, in_range<TypeParam>()(src, sizeof(TypeParam), dst,
+                                               sizeof(uint8_t), 1, 0, 1, 1));
+}
+
+TYPED_TEST(InRange, OversizeImage) {
+  TypeParam src[1] = {};
+  uint8_t dst[1];
+  EXPECT_EQ(KLEIDICV_ERROR_RANGE,
+            in_range<TypeParam>()(src, sizeof(TypeParam), dst, sizeof(uint8_t),
+                                  KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, 1, 1));
+  EXPECT_EQ(KLEIDICV_ERROR_RANGE,
+            in_range<TypeParam>()(src, sizeof(TypeParam), dst, sizeof(uint8_t),
+                                  KLEIDICV_MAX_IMAGE_PIXELS,
+                                  KLEIDICV_MAX_IMAGE_PIXELS, 1, 1));
+}
-- 
GitLab


From 70789beb773545c0547c8b42b2f28204ebbe5efd Mon Sep 17 00:00:00 2001
From: Ioana Ghiban <ioana.ghiban@arm.com>
Date: Fri, 9 Aug 2024 10:59:11 +0200
Subject: [PATCH 3/4] Allow testing special float values in all conformity
 tests

---
 conformity/opencv/common.h            | 44 +++++++++++++++++++++++++++
 conformity/opencv/test_float_conv.cpp | 44 ---------------------------
 conformity/opencv/test_in_range.cpp   | 22 ++++++++++++++
 3 files changed, 66 insertions(+), 44 deletions(-)

diff --git a/conformity/opencv/common.h b/conformity/opencv/common.h
index 002266df2..351ba4ddf 100644
--- a/conformity/opencv/common.h
+++ b/conformity/opencv/common.h
@@ -32,6 +32,50 @@
 
 #define KLEIDICV_CONFORMITY_MAX_MAT_DIMENSIONS 4
 
+static float floatval(uint32_t v) {
+  float result;  // Avoid cppcoreguidelines-init-variables. NOLINT
+  static_assert(sizeof(result) == sizeof(v));
+  memcpy(&result, &v, sizeof(result));
+  return result;
+}
+
+const float quietNaN = std::numeric_limits<float>::quiet_NaN();
+const float signalingNaN = std::numeric_limits<float>::signaling_NaN();
+const float posInfinity = std::numeric_limits<float>::infinity();
+const float negInfinity = -std::numeric_limits<float>::infinity();
+
+const float minusNaN = floatval(0xFF800001);
+const float plusNaN = floatval(0x7F800001);
+const float plusZero = 0.0F;
+const float minusZero = -0.0F;
+
+const float oneNaN = floatval(0x7FC00001);
+const float zeroDivZero = -std::numeric_limits<float>::quiet_NaN();
+const float floatMin = std::numeric_limits<float>::min();
+const float floatMax = std::numeric_limits<float>::max();
+
+const float posSubnormalMin = std::numeric_limits<float>::denorm_min();
+const float posSubnormalMax = floatval(0x007FFFFF);
+const float negSubnormalMin = -std::numeric_limits<float>::denorm_min();
+const float negSubnormalMax = floatval(0x807FFFFF);
+
+static constexpr int custom_data_float_height = 8;
+static constexpr int custom_data_float_width = 4;
+
+static float
+    custom_data_float[custom_data_float_height * custom_data_float_width] = {
+        // clang-format off
+        quietNaN, signalingNaN, posInfinity, negInfinity,
+        minusNaN, plusNaN, plusZero, minusZero,
+        oneNaN, zeroDivZero, floatMin, floatMax,
+        posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax,
+        1111.11, -1112.22, 113.33, 114.44,
+        111.51, 112.62, 113.73, 114.84,
+        126.66, 127.11, 128.66, 129.11,
+        11.5, 12.5, -11.5, -12.5,
+        // clang-format on
+};
+
 template <typename T>
 static constexpr T min() {
   return std::numeric_limits<T>::min();
diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp
index 26b6bb189..152301617 100644
--- a/conformity/opencv/test_float_conv.cpp
+++ b/conformity/opencv/test_float_conv.cpp
@@ -7,33 +7,6 @@
 
 #include "tests.h"
 
-float floatval(uint32_t v) {
-  float result;
-  static_assert(sizeof(result) == sizeof(v));
-  memcpy(&result, &v, sizeof(result));
-  return result;
-}
-
-float quietNaN = std::numeric_limits<float>::quiet_NaN();
-float signalingNaN = std::numeric_limits<float>::signaling_NaN();
-float posInfinity = std::numeric_limits<float>::infinity();
-float negInfinity = -std::numeric_limits<float>::infinity();
-
-float minusNaN = floatval(0xFF800001);
-float plusNaN = floatval(0x7F800001);
-float plusZero = 0.0F;
-float minusZero = -0.0F;
-
-float oneNaN = floatval(0x7FC00001);
-float zeroDivZero = -std::numeric_limits<float>::quiet_NaN();
-float floatMin = std::numeric_limits<float>::min();
-float floatMax = std::numeric_limits<float>::max();
-
-float posSubnormalMin = std::numeric_limits<float>::denorm_min();
-float posSubnormalMax = floatval(0x007FFFFF);
-float negSubnormalMin = -std::numeric_limits<float>::denorm_min();
-float negSubnormalMax = floatval(0x807FFFFF);
-
 template <bool Signed>
 cv::Mat exec_float32_to_int8(cv::Mat& input) {
   cv::Mat result;
@@ -98,23 +71,6 @@ bool test_int8_to_float32_random(int index,
   return false;
 }
 
-static constexpr int custom_data_float_height = 8;
-static constexpr int custom_data_float_width = 4;
-
-static float
-    custom_data_float[custom_data_float_height * custom_data_float_width] = {
-        // clang-format off
-        quietNaN, signalingNaN, posInfinity, negInfinity,
-        minusNaN, plusNaN, plusZero, minusZero,
-        oneNaN, zeroDivZero, floatMin, floatMax,
-        posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax,
-        1111.11, -1112.22, 113.33, 114.44,
-        111.51, 112.62, 113.73, 114.84,
-        126.66, 127.11, 128.66, 129.11,
-        11.5, 12.5, -11.5, -12.5,
-        // clang-format on
-};
-
 static constexpr int custom_data_int8_height = 1;
 static constexpr int custom_data_int8_width = 7;
 
diff --git a/conformity/opencv/test_in_range.cpp b/conformity/opencv/test_in_range.cpp
index e925c80b2..315474e05 100644
--- a/conformity/opencv/test_in_range.cpp
+++ b/conformity/opencv/test_in_range.cpp
@@ -36,6 +36,26 @@ bool test_in_range(int index, RecreatedMessageQueue& request_queue,
 
   return false;
 }
+
+template <int LowerBound, int UpperBound, size_t Format>
+bool test_in_range_custom(int index, RecreatedMessageQueue& request_queue,
+                          RecreatedMessageQueue& reply_queue) {
+  cv::Mat input(custom_data_float_height, custom_data_float_width, CV_32FC1,
+                custom_data_float);
+
+  cv::Mat actual = exec_in_range<LowerBound, UpperBound>(input);
+  cv::Mat expected =
+      get_expected_from_subordinate(index, request_queue, reply_queue, input);
+
+  if (are_matrices_different<uint8_t>(0, actual, expected)) {
+    fail_print_matrices(custom_data_float_height, custom_data_float_width,
+                        input, actual, expected);
+    return true;
+  }
+
+  return false;
+}
+
 #endif
 
 std::vector<test>& in_range_tests_get() {
@@ -56,6 +76,8 @@ std::vector<test>& in_range_tests_get() {
     TEST("InRange float,  lower_bound =      14.999, upper_bound =      20.998", (test_in_range<14999, 20998, CV_32FC1>), (exec_in_range<14999, 20998>)),
     TEST("InRange float,  lower_bound =  999999.998, upper_bound =  999999.999", (test_in_range<(999999998), (999999999), CV_32FC1>), (exec_in_range<(999999998), (999999999)>)),
     TEST("InRange float,  lower_bound =  999989.999, upper_bound =  999999.999", (test_in_range<(999989999), (999999999), CV_32FC1>), (exec_in_range<(999989999), (999999999)>)),
+    TEST("InRange float,  lower_bound =           0, upper_bound =           1", (test_in_range_custom<(0), (1), CV_32FC1>), (exec_in_range<(0), (1)>)),
+    TEST("InRange float,  lower_bound =       -11.4, upper_bound =      1111.1", (test_in_range_custom<(-11400), (1111100), CV_32FC1>), (exec_in_range<(-11400), (1111100)>)),
   };
   // clang-format on
   return tests;
-- 
GitLab


From 1c342af3faf0d50253a596b512e76c7051fdb322 Mon Sep 17 00:00:00 2001
From: Ioana Ghiban <ioana.ghiban@arm.com>
Date: Fri, 9 Aug 2024 11:01:56 +0200
Subject: [PATCH 4/4] Add SVE intrinsics overloads required for inRange

---
 kleidicv/include/kleidicv/sve2.h | 40 ++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/kleidicv/include/kleidicv/sve2.h b/kleidicv/include/kleidicv/sve2.h
index ade877891..214cda636 100644
--- a/kleidicv/include/kleidicv/sve2.h
+++ b/kleidicv/include/kleidicv/sve2.h
@@ -331,9 +331,9 @@ class VecTraits<int8_t> : public VecTraitsBase<int8_t> {
   static inline svint8_t svdup(int8_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_s8(v);
   }
-  static inline svuint8_t svreinterpret(svint8_t v)
+  static inline svint8_t svreinterpret(svuint8_t v)
       KLEIDICV_STREAMING_COMPATIBLE {
-    return svreinterpret_u8(v);
+    return svreinterpret_s8(v);
   }
   static inline svint8_t svasr_n(svbool_t pg, svint8_t v,
                                  uint8_t s) KLEIDICV_STREAMING_COMPATIBLE {
@@ -347,9 +347,13 @@ class VecTraits<uint8_t> : public VecTraitsBase<uint8_t> {
   static inline svuint8_t svdup(uint8_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_u8(v);
   }
-  static inline svint8_t svreinterpret(svuint8_t v)
+  static inline svuint8_t svreinterpret(svint8_t v)
       KLEIDICV_STREAMING_COMPATIBLE {
-    return svreinterpret_s8(v);
+    return svreinterpret_u8(v);
+  }
+  static inline svuint8_t svsub(svbool_t pg, svuint8_t v,
+                                svuint8_t u) KLEIDICV_STREAMING_COMPATIBLE {
+    return svsub_u8_x(pg, v, u);
   }
   static inline svuint8_t svhsub(svbool_t pg, svuint8_t v,
                                  svuint8_t u) KLEIDICV_STREAMING_COMPATIBLE {
@@ -363,6 +367,10 @@ class VecTraits<int16_t> : public VecTraitsBase<int16_t> {
   static inline svint16_t svdup(int16_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_s16(v);
   }
+  static inline svint16_t svreinterpret(svuint16_t v)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    return svreinterpret_s16(v);
+  }
 };  // end of class VecTraits<int16_t>
 
 template <>
@@ -371,6 +379,10 @@ class VecTraits<uint16_t> : public VecTraitsBase<uint16_t> {
   static inline svuint16_t svdup(uint16_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_u16(v);
   }
+  static inline svuint16_t svreinterpret(svint16_t v)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    return svreinterpret_u16(v);
+  }
 };  // end of class VecTraits<uint16_t>
 
 template <>
@@ -379,6 +391,10 @@ class VecTraits<int32_t> : public VecTraitsBase<int32_t> {
   static inline svint32_t svdup(int32_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_s32(v);
   }
+  static inline svint32_t svreinterpret(svuint32_t v)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    return svreinterpret_s32(v);
+  }
 };  // end of class VecTraits<int32_t>
 
 template <>
@@ -387,6 +403,10 @@ class VecTraits<uint32_t> : public VecTraitsBase<uint32_t> {
   static inline svuint32_t svdup(uint32_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_u32(v);
   }
+  static inline svuint32_t svreinterpret(svint32_t v)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    return svreinterpret_u32(v);
+  }
 };  // end of class VecTraits<uint32_t>
 
 template <>
@@ -395,6 +415,10 @@ class VecTraits<int64_t> : public VecTraitsBase<int64_t> {
   static inline svint64_t svdup(int64_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_s64(v);
   }
+  static inline svint64_t svreinterpret(svuint64_t v)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    return svreinterpret_s64(v);
+  }
 };  // end of class VecTraits<int64_t>
 
 template <>
@@ -403,6 +427,10 @@ class VecTraits<uint64_t> : public VecTraitsBase<uint64_t> {
   static inline svuint64_t svdup(uint64_t v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_u64(v);
   }
+  static inline svuint64_t svreinterpret(svint64_t v)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    return svreinterpret_u64(v);
+  }
 };  // end of class VecTraits<uint64_t>
 
 template <>
@@ -411,6 +439,10 @@ class VecTraits<float> : public VecTraitsBase<float> {
   static inline svfloat32_t svdup(float v) KLEIDICV_STREAMING_COMPATIBLE {
     return svdup_f32(v);
   }
+  static inline svfloat32_t svsub(svbool_t pg, svfloat32_t v,
+                                  svfloat32_t u) KLEIDICV_STREAMING_COMPATIBLE {
+    return svsub_f32_x(pg, v, u);
+  }
 };  // end of class VecTraits<float>
 
 template <>
-- 
GitLab