From a03ff21dbc2380e742dc6c04dbb562f041a17464 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Fri, 22 Mar 2024 16:52:19 +0100
Subject: [PATCH 1/8] Fix typo in sve2.h

---
 intrinsiccv/include/intrinsiccv/sve2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/intrinsiccv/include/intrinsiccv/sve2.h b/intrinsiccv/include/intrinsiccv/sve2.h
index cd326d0c7..df7b4f3e9 100644
--- a/intrinsiccv/include/intrinsiccv/sve2.h
+++ b/intrinsiccv/include/intrinsiccv/sve2.h
@@ -491,7 +491,7 @@ class RemainingPathAdapter : public OperationBase<OperationType> {
   }
 };  // end of class RemainingPathAdapter<OperationType>
 
-// Shorthand for applying a generic unrolled NEON operation.
+// Shorthand for applying a generic unrolled SVE2 operation.
 template <typename OperationType, typename... ArgTypes>
 void apply_operation_by_rows(OperationType &operation, ArgTypes &&...args)
     INTRINSICCV_STREAMING_COMPATIBLE {
-- 
GitLab


From 5b91a0d2e51d89c18b6cb8454f092a47d1a2829d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Fri, 22 Mar 2024 17:12:28 +0100
Subject: [PATCH 2/8] Better fix for error "non-constant-expression cannot be
 narrowed"

---
 test/api/test_resize_linear.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/api/test_resize_linear.cpp b/test/api/test_resize_linear.cpp
index a011a259b..18dcd6f3f 100644
--- a/test/api/test_resize_linear.cpp
+++ b/test/api/test_resize_linear.cpp
@@ -239,7 +239,7 @@ static void do_large_dimensions_test(size_t x_scale, size_t y_scale) {
   src.resize(src_stride * src_height);
   dst.resize(dst_stride * dst_height);
   expected_data.resize(dst_stride * dst_height);
-  std::mt19937 generator{static_cast<unsigned>(test::Options::seed())};
+  std::mt19937 generator(test::Options::seed());
   std::generate(src.begin(), src.end(), generator);
   resize_linear_unaccelerated_u8(src.data(), src_stride, src_width, src_height,
                                  expected_data.data(), dst_stride, dst_width,
-- 
GitLab


From 64c7392a65eb5e21da2a15ee9783cc53596f1bd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Thu, 4 Apr 2024 11:28:09 +0200
Subject: [PATCH 3/8] Add *.h.in files checking to formatting script

---
 scripts/format.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/format.sh b/scripts/format.sh
index fba356fed..c50dfeda6 100755
--- a/scripts/format.sh
+++ b/scripts/format.sh
@@ -33,7 +33,7 @@ SOURCES="$(find \
     "${INTRINSICCV_ROOT_PATH}"/benchmark \
     "${INTRINSICCV_ROOT_PATH}"/intrinsiccv \
     "${INTRINSICCV_ROOT_PATH}"/test \
-    \( -name \*.cpp -o -name \*.h \) \
+    \( -name \*.cpp -o -name \*.h -o -name \*.h.in \) \
     -print)"
 
 if [[ "${CHECK_ONLY}" == "ON" ]]; then
-- 
GitLab


From ab25b050265ce0f0cd658beac485414d984de65d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Fri, 22 Mar 2024 17:34:23 +0100
Subject: [PATCH 4/8] Implement float32 to int8 type conversion

---
 adapters/opencv/intrinsiccv_hal.cpp           |  9 ++
 intrinsiccv/include/intrinsiccv/intrinsiccv.h | 23 +++++
 .../src/conversions/float_to_int_api.cpp      | 44 +++++++++
 .../src/conversions/float_to_int_neon.cpp     | 20 +++++
 intrinsiccv/src/conversions/float_to_int_sc.h | 89 +++++++++++++++++++
 .../src/conversions/float_to_int_sme2.cpp     | 21 +++++
 .../src/conversions/float_to_int_sve2.cpp     | 19 ++++
 7 files changed, 225 insertions(+)
 create mode 100644 intrinsiccv/src/conversions/float_to_int_api.cpp
 create mode 100644 intrinsiccv/src/conversions/float_to_int_neon.cpp
 create mode 100644 intrinsiccv/src/conversions/float_to_int_sc.h
 create mode 100644 intrinsiccv/src/conversions/float_to_int_sme2.cpp
 create mode 100644 intrinsiccv/src/conversions/float_to_int_sve2.cpp

diff --git a/adapters/opencv/intrinsiccv_hal.cpp b/adapters/opencv/intrinsiccv_hal.cpp
index 3076ea6d5..d22318ddc 100644
--- a/adapters/opencv/intrinsiccv_hal.cpp
+++ b/adapters/opencv/intrinsiccv_hal.cpp
@@ -670,6 +670,15 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth,
               uchar *dst_data, size_t dst_step, int dst_depth, int width,
               int height, double scale, double shift) {
   if (src_depth != dst_depth) {
+    // type conversion
+    if (scale == 1.0 && shift == 0.0) {
+      // float32 to int8
+      if (src_depth == CV_32F && dst_depth == CV_8S) {
+        return convert_error(intrinsiccv_type_conversion_f32_s8(
+            reinterpret_cast<const float *>(src_data), src_step,
+            reinterpret_cast<int8_t *>(dst_data), dst_step, width, height));
+      }
+    }
     return CV_HAL_ERROR_NOT_IMPLEMENTED;
   }
 
diff --git a/intrinsiccv/include/intrinsiccv/intrinsiccv.h b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
index c58397274..3e5d94a2c 100644
--- a/intrinsiccv/include/intrinsiccv/intrinsiccv.h
+++ b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
@@ -1268,6 +1268,29 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src,
                             size_t width, size_t height, float scale,
                             float shift);
 
+/// Converts the elements in `src` from type `float` to type `int8_t`,
+/// then stores the result in `dst`.
+///
+/// Each resulting element is saturated, i.e. it is the smallest/largest
+/// number of the type of the element if the result would underflow/overflow.
+/// Source and destination data length is `width` * `height`. Number of elements
+/// is limited to @ref INTRINSICCV_MAX_IMAGE_PIXELS.
+///
+/// @param src          Pointer to the source data. Must be non-null.
+/// @param src_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the source data.
+///                     Must not be less than width * sizeof(type).
+/// @param dst          Pointer to the destination data. Must be non-null.
+/// @param dst_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the destination data.
+///                     Must not be less than width * sizeof(type).
+/// @param width        Number of elements in a row.
+/// @param height       Number of rows in the data.
+///
+INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_s8,
+                            const float *src, size_t src_stride, int8_t *dst,
+                            size_t dst_stride, size_t width, size_t height);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/intrinsiccv/src/conversions/float_to_int_api.cpp b/intrinsiccv/src/conversions/float_to_int_api.cpp
new file mode 100644
index 000000000..8a8a92f42
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_api.cpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "intrinsiccv/dispatch.h"
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/types.h"
+
+namespace intrinsiccv {
+
+namespace neon {
+
+intrinsiccv_error_t type_conversion_float_to_int8_t(
+    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
+    size_t width, size_t height);
+
+}  // namespace neon
+
+namespace sve2 {
+
+intrinsiccv_error_t type_conversion_float_to_int8_t(
+    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
+    size_t width, size_t height);
+
+}  // namespace sve2
+
+namespace sme2 {
+
+intrinsiccv_error_t type_conversion_float_to_int8_t(
+    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
+    size_t width, size_t height);
+
+}  // namespace sme2
+
+#define INTRINSICCV_DEFINE_C_API(name, itype, otype)                 \
+  INTRINSICCV_MULTIVERSION_C_API(                                    \
+      name, intrinsiccv::neon::type_conversion_##itype##_to_##otype, \
+      INTRINSICCV_SVE2_IMPL_IF(                                      \
+          intrinsiccv::sve2::type_conversion_##itype##_to_##otype),  \
+      intrinsiccv::sme2::type_conversion_##itype##_to_##otype)
+
+INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, float, int8_t);
+
+}  // namespace intrinsiccv
diff --git a/intrinsiccv/src/conversions/float_to_int_neon.cpp b/intrinsiccv/src/conversions/float_to_int_neon.cpp
new file mode 100644
index 000000000..6d3fee394
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_neon.cpp
@@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/neon.h"
+
+namespace intrinsiccv::neon {
+
+intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t,
+                                                    int8_t*, size_t, size_t,
+                                                    size_t);
+
+intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t,
+                                                    int8_t*, size_t, size_t,
+                                                    size_t) {
+  return INTRINSICCV_ERROR_NOT_IMPLEMENTED;
+}
+
+}  // namespace intrinsiccv::neon
diff --git a/intrinsiccv/src/conversions/float_to_int_sc.h b/intrinsiccv/src/conversions/float_to_int_sc.h
new file mode 100644
index 000000000..10f8f749a
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_sc.h
@@ -0,0 +1,89 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_FLOAT_TO_INT_SC_H
+#define INTRINSICCV_FLOAT_TO_INT_SC_H
+
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/sve2.h"
+
+namespace INTRINSICCV_TARGET_NAMESPACE {
+
+class float_to_int_operation final {
+ public:
+  using SrcVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<float>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using DstVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<int32_t>;
+  using DstVectorType = typename DstVecTraits::VectorType;
+
+  using ContextType = INTRINSICCV_TARGET_NAMESPACE::Context;
+  using VecTraits = SrcVecTraits;
+
+  void process_row(size_t width, Columns<const float> src,
+                   Columns<int8_t> dst) {
+    LoopUnroll{width, VecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE {
+          svbool_t pg = VecTraits::svptrue();
+          Context ctx{pg};
+          SrcVectorType src_vector1 = svld1(pg, &src[0]);
+          SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1);
+          DstVectorType result_vector1 = vector_path(ctx, src_vector1);
+          DstVectorType result_vector2 = vector_path(ctx, src_vector2);
+          svst1b(pg, &dst[0], result_vector1);
+          svst1b_vnum(pg, &dst[0], 1, result_vector2);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE {
+          size_t index = 0;
+          svbool_t pg = VecTraits::svwhilelt(index, length);
+          Context ctx{pg};
+          while (svptest_first(VecTraits::svptrue(), pg)) {
+            SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
+            DstVectorType result_vector = vector_path(ctx, src_vector);
+            svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
+            // Update loop counter and calculate the next governing predicate.
+            index += VecTraits::num_lanes();
+            pg = VecTraits::svwhilelt(index, length);
+            ctx.set_predicate(pg);
+          }
+        });
+  }
+
+ private:
+  DstVectorType vector_path(ContextType ctx, SrcVectorType src)
+      INTRINSICCV_STREAMING_COMPATIBLE {
+    svbool_t pg = ctx.predicate();
+
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t less = svcmplt_n_f32(pg, src, -128.0);
+    src = svdup_n_f32_m(src, less, -128.0);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, 127.0);
+    src = svdup_n_f32_m(src, greater, 127.0);
+
+    return svcvt_s32_f32_x(pg, src);
+  }
+};  // end of class float_to_int_operation<float>
+
+static intrinsiccv_error_t type_conversion_float_to_int8_t_sc(
+    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
+    size_t width, size_t height) INTRINSICCV_STREAMING_COMPATIBLE {
+  CHECK_POINTER_AND_STRIDE(src, src_stride);
+  CHECK_POINTER_AND_STRIDE(dst, dst_stride);
+  CHECK_IMAGE_SIZE(width, height);
+
+  float_to_int_operation operation;
+  Rectangle rect{width, height};
+  Rows<const float> src_rows{src, src_stride};
+  Rows<int8_t> dst_rows{dst, dst_stride};
+  zip_rows(operation, rect, src_rows, dst_rows);
+
+  return INTRINSICCV_OK;
+}
+
+}  // namespace INTRINSICCV_TARGET_NAMESPACE
+
+#endif  // INTRINSICCV_FLOAT_TO_INT_SC_H
diff --git a/intrinsiccv/src/conversions/float_to_int_sme2.cpp b/intrinsiccv/src/conversions/float_to_int_sme2.cpp
new file mode 100644
index 000000000..af389a575
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_sme2.cpp
@@ -0,0 +1,21 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_to_int_sc.h"
+
+namespace intrinsiccv::sme2 {
+
+INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
+type_conversion_float_to_int8_t(const float*, size_t, int8_t*, size_t, size_t,
+                                size_t);
+
+INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
+type_conversion_float_to_int8_t(const float* src, size_t src_stride,
+                                int8_t* dst, size_t dst_stride, size_t width,
+                                size_t height) {
+  return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride,
+                                            width, height);
+}
+
+}  // namespace intrinsiccv::sme2
diff --git a/intrinsiccv/src/conversions/float_to_int_sve2.cpp b/intrinsiccv/src/conversions/float_to_int_sve2.cpp
new file mode 100644
index 000000000..f58ea271e
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_sve2.cpp
@@ -0,0 +1,19 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_to_int_sc.h"
+
+namespace intrinsiccv::sve2 {
+
+INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t(
+    const float*, size_t, int8_t*, size_t, size_t, size_t);
+
+INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t(
+    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
+    size_t width, size_t height) {
+  return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride,
+                                            width, height);
+}
+
+}  // namespace intrinsiccv::sve2
-- 
GitLab


From 0879a05b8126f8fe7931ecf5610b157067072c9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Thu, 28 Mar 2024 18:27:48 +0100
Subject: [PATCH 5/8] Added unit tests for float32 to int8 conversion

---
 intrinsiccv/include/intrinsiccv/config.h.in |  11 +
 test/api/test_float_to_int8_t.cpp           | 251 ++++++++++++++++++++
 test/framework/array.h                      |  16 ++
 3 files changed, 278 insertions(+)
 create mode 100644 test/api/test_float_to_int8_t.cpp

diff --git a/intrinsiccv/include/intrinsiccv/config.h.in b/intrinsiccv/include/intrinsiccv/config.h.in
index 068c88b66..dd521b82a 100644
--- a/intrinsiccv/include/intrinsiccv/config.h.in
+++ b/intrinsiccv/include/intrinsiccv/config.h.in
@@ -90,4 +90,15 @@
 #define INTRINSICCV_NODISCARD
 #endif
 
+// GCC and clang
+#ifdef __GNUC__
+#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN \
+  _Pragma("GCC diagnostic push")             \
+      _Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"")
+#define INTRINSICCV_NO_STRICT_ALIASING_END _Pragma("GCC diagnostic pop")
+#else
+#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN
+#define INTRINSICCV_NO_STRICT_ALIASING_END
+#endif
+
 #endif  // INTRINSICCV_CONFIG_H
diff --git a/test/api/test_float_to_int8_t.cpp b/test/api/test_float_to_int8_t.cpp
new file mode 100644
index 000000000..23d03507d
--- /dev/null
+++ b/test/api/test_float_to_int8_t.cpp
@@ -0,0 +1,251 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include "framework/array.h"
+#include "framework/generator.h"
+#include "framework/operation.h"
+#include "framework/utils.h"
+#include "intrinsiccv/intrinsiccv.h"
+#include "test_config.h"
+
+#define INTRINSICCV_TYPE_CONVERSION(itype, input_type_name) \
+  INTRINSICCV_API(float_to_int8_t,                          \
+                  intrinsiccv_type_conversion_##input_type_name##_s8, itype)
+
+INTRINSICCV_TYPE_CONVERSION(float, f32);
+
+template <typename ElementType>
+class Float32ToInt8TestBase {
+  using OutputType = int8_t;
+
+ protected:
+  static constexpr OutputType min() {
+    return std::numeric_limits<OutputType>::min();
+  }
+  static constexpr OutputType max() {
+    return std::numeric_limits<OutputType>::max();
+  }
+
+  struct Elements {
+    size_t width;
+    size_t height;
+
+    std::vector<std::vector<ElementType>> source_rows;
+    std::vector<std::vector<OutputType>> expected_rows;
+
+    Elements(size_t _width, size_t _height,
+             std::vector<std::vector<ElementType>>&& _source_rows,
+             std::vector<std::vector<OutputType>>&& _expected_rows)
+        : width(_width),
+          height(_height),
+          source_rows(std::move(_source_rows)),
+          expected_rows(std::move(_expected_rows)) {}
+  };
+
+ private:
+  static constexpr uint32_t quietNaN = 0x7FC00000;
+  static constexpr uint32_t signalingNaN = 0x7FA00000;
+  static constexpr uint32_t posInfinity = 0x7F800000;
+  static constexpr uint32_t negInfinity = 0xFF800000;
+
+  static constexpr uint32_t minusNaN = 0xFF800001;
+  static constexpr uint32_t plusNaN = 0x7F800001;
+  static constexpr uint32_t plusZero = 0x00000000;
+  static constexpr uint32_t minusZero = 0x80000000;
+
+  static constexpr uint32_t oneNaN = 0x7FC00001;
+  static constexpr uint32_t zeroDivZero = 0xFFC00000;
+  static constexpr uint32_t floatMin = 0x00800000;
+  static constexpr uint32_t floatMax = 0x7F7FFFFF;
+
+  static constexpr float _floatval(uint32_t v) {
+    static_assert(sizeof(float) == 4);
+    INTRINSICCV_NO_STRICT_ALIASING_BEGIN
+    return *reinterpret_cast<float*>(&v);
+    INTRINSICCV_NO_STRICT_ALIASING_END
+  }
+
+  const Elements test_case_custom = {
+      // clang-format off
+    4, 6,
+    {{
+      { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) },
+      { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) },
+      { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) },
+      { 1111.11, -1112.22, 113.33, 114.44 },
+      { 111.51, 112.62, 113.73, 114.84 },
+      { 126.66, 127.11, 128.66, 129.11 },
+      { 11.5, 12.5, -11.5, -12.5 }
+    }},
+    {{
+      { 0, 0, 127, -128 },
+      { 0, 0, 0, 0 },
+      { 0, 0, 0, 127 },
+      { 127, -128, 113, 114 },
+      { 112, 113, 114, 115 },
+      { 127, 127, 127, 127 },
+      { 12, 12, -12, -12 }
+    }}
+      // clang-format on
+  };
+
+ public:
+  // minimum_size set by caller to trigger the 'big' conversion path.
+  void test_scalar(size_t minimum_size = 1) {
+    size_t width = test::Options::vector_length() - 1;
+    test_linear(width, minimum_size);
+  }
+
+  void test_vector(size_t minimum_size = 1) {
+    size_t width = test::Options::vector_length() * 2;
+    test_linear(width, minimum_size);
+  }
+
+  void test_custom() {
+    const size_t& width = test_case_custom.width;
+    const size_t& height = test_case_custom.height;
+
+    test::Array2D<ElementType> source(width, height);
+    test::Array2D<OutputType> expected(width, height);
+    test::Array2D<OutputType> actual(width, height);
+
+    for (size_t i = 0; i < height; i++) {
+      source.set(i, 0, test_case_custom.source_rows[i]);
+      expected.set(i, 0, test_case_custom.expected_rows[i]);
+    }
+
+    ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test_fill(const size_t width, const size_t height) {
+    test::Array2D<ElementType> source(width, height, 1, 1);
+    test::Array2D<OutputType> expected(width, height, 1, 1);
+    test::Array2D<OutputType> actual(width, height, 1, 1);
+
+    source.fill(10.67F);
+    expected.fill(11);
+
+    actual.fill(0);
+
+    ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+ private:
+  class GenerateLinearSeries : public test::Generator<ElementType> {
+   public:
+    explicit GenerateLinearSeries(ElementType start_from)
+        : counter_{start_from} {}
+
+    std::optional<ElementType> next() override { return counter_++; }
+
+   private:
+    ElementType counter_;
+  };  // end of class GenerateLinearSeries
+
+  void test_linear(size_t width, size_t minimum_size) {
+    size_t image_size =
+        std::max(minimum_size, static_cast<size_t>(max() - min()));
+    size_t height = image_size / width + 1;
+    test::Array2D<ElementType> source(width, height, 1, 1);
+    test::Array2D<OutputType> expected(width, height, 1, 1);
+    test::Array2D<OutputType> actual(width, height, 1, 1);
+
+    GenerateLinearSeries generator(min());
+
+    source.fill(generator);
+
+    calculate_expected(source, expected);
+
+    ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+ protected:
+  void calculate_expected(const test::Array2D<ElementType>& source,
+                          test::Array2D<OutputType>& expected) {
+    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
+      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
+        OutputType calculated = 0;
+        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
+        ElementType result = *source.at(hindex, vindex);
+        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
+        if (result > max()) {
+          calculated = max();
+        } else if (result < min()) {
+          calculated = min();
+        } else {
+          calculated = result;
+        }
+        *expected.at(hindex, vindex) = calculated;
+      }
+    }
+  }
+};  // end of class Float32ToInt8TestBase
+
+template <typename ElementType>
+class Float32ToInt8Test1 final : public Float32ToInt8TestBase<ElementType> {};
+
+template <typename TypeParam>
+class Float32ToInt8Test : public testing::Test {};
+
+using ElementTypes = ::testing::Types<float>;
+
+// Tests intrinsiccv_float_to_int8_t API.
+TYPED_TEST_SUITE(Float32ToInt8Test, ElementTypes);
+
+TYPED_TEST(Float32ToInt8Test, TestScalar) {
+  Float32ToInt8Test1<TypeParam>{}.test_scalar();
+}
+TYPED_TEST(Float32ToInt8Test, TestVector) {
+  Float32ToInt8Test1<TypeParam>{}.test_vector();
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomValues) {
+  Float32ToInt8Test1<TypeParam>{}.test_custom();
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomFits128VectorSize) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(4, 1);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomFits128VectorSize2x) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(4, 2);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomFits128VectorSize3x) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(4, 3);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomFits512VectorSize) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(4, 4);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomFits512VectorSize2x) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(4, 8);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomFits512VectorSize3x) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(6, 8);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustom128OneRemaining) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(1, 17);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustom128AllButOneRemaining) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(5, 3);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomAboutHalfRemaining) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(19, 2);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomEmpty) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(0, 0);
+}
+TYPED_TEST(Float32ToInt8Test, TestCustomOne) {
+  Float32ToInt8Test1<TypeParam>{}.test_fill(1, 1);
+}
diff --git a/test/framework/array.h b/test/framework/array.h
index f854c9819..7f010cac6 100644
--- a/test/framework/array.h
+++ b/test/framework/array.h
@@ -141,6 +141,22 @@ class Array2D : public TwoDimensional<ElementType> {
     }
   }
 
+  // Sets values in a row starting at a given column from a const vector.
+  void set(size_t row, size_t column, const std::vector<ElementType> &values) {
+    ASSERT_EQ(valid(), true) << "Array is invalid.";
+    ASSERT_GE(width() - column, values.size());
+
+    ElementType *ptr = at(row, column);
+    if (!ptr) {
+      return;
+    }
+
+    size_t index = 0;
+    for (ElementType value : values) {
+      ptr[index++] = value;
+    }
+  }
+
   // Sets values starting in a given row starting at a given column.
   //
   // The layout of the input TwoDimensional object is not altered, meaning that
-- 
GitLab


From db064099398c9691a2143bc1d4b344829f4b937d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Tue, 2 Apr 2024 15:19:51 +0200
Subject: [PATCH 6/8] Exclude NEON float tests from CI

---
 scripts/ci.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/ci.sh b/scripts/ci.sh
index 25086dfb0..e0c44cb0b 100755
--- a/scripts/ci.sh
+++ b/scripts/ci.sh
@@ -48,16 +48,17 @@ ninja -C build/gcc
 
 # Run tests
 LONG_VECTOR_TESTS="GRAY2.*:RGB*"
+FLOAT_CONVERSION_TESTS="-Float32ToInt8Test*"
 TESTRESULT=0
 qemu-aarch64     build/test/framework/intrinsiccv-framework-test --gtest_output=xml:build/test-results/ || TESTRESULT=1
-qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-neon/ || TESTRESULT=1
+qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_filter="${FLOAT_CONVERSION_TESTS}" --gtest_output=xml:build/test-results/clang-neon/ || TESTRESULT=1
 qemu-aarch64 -cpu max,sve128=on,sme=off \
   build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-sve128/ --vector-length=16 || TESTRESULT=1
 qemu-aarch64 -cpu max,sve2048=on,sve-default-vector-length=256,sme=off \
   build/test/api/intrinsiccv-api-test --gtest_filter="${LONG_VECTOR_TESTS}" --gtest_output=xml:build/test-results/clang-sve2048/ --vector-length=256 || TESTRESULT=1
 qemu-aarch64 -cpu max,sve128=on,sme512=on \
   build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-sme/ --vector-length=64 || TESTRESULT=1
-qemu-aarch64 -cpu cortex-a35 build/gcc/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/gcc-neon/ || TESTRESULT=1
+qemu-aarch64 -cpu cortex-a35 build/gcc/test/api/intrinsiccv-api-test --gtest_filter="${FLOAT_CONVERSION_TESTS}" --gtest_output=xml:build/test-results/gcc-neon/ || TESTRESULT=1
 
 scripts/prefix_testsuite_names.py build/test-results/clang-neon/intrinsiccv-api-test.xml "clang-neon."
 scripts/prefix_testsuite_names.py build/test-results/clang-sve128/intrinsiccv-api-test.xml "clang-sve128."
@@ -76,7 +77,7 @@ if [[ $(dpkg --print-architecture) = arm64 ]]; then
     -DINTRINSICCV_ENABLE_SME2=OFF \
     -DCMAKE_CXX_FLAGS="-fsanitize=address,undefined -fno-sanitize-recover=all -Wno-pass-failed"
   ninja -C build/sanitize intrinsiccv-api-test
-  build/sanitize/test/api/intrinsiccv-api-test
+  build/sanitize/test/api/intrinsiccv-api-test --gtest_filter="${FLOAT_CONVERSION_TESTS}"
 fi
 
 # Build benchmarks, just to prevent bitrot.
-- 
GitLab


From eac64557bac9dcac3adc4cf3e7627be5391979a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Wed, 3 Apr 2024 13:46:36 +0200
Subject: [PATCH 7/8] Added OpenCV conformity tests (float32 to int8)

---
 README.md                               |   5 +-
 conformity/opencv/CMakeLists.txt        |  90 ++++++++
 conformity/opencv/README.md             |  25 +++
 conformity/opencv/common.h              | 273 ++++++++++++++++++++++++
 conformity/opencv/manager.cpp           |  56 +++++
 conformity/opencv/subordinate.cpp       |  18 ++
 conformity/opencv/tests.cpp             | 137 ++++++++++++
 conformity/opencv/tests.h               |  18 ++
 scripts/ci.sh                           |   4 +
 scripts/format.sh                       |   9 +-
 scripts/run_opencv_conformity_checks.sh |  42 ++++
 11 files changed, 671 insertions(+), 6 deletions(-)
 create mode 100644 conformity/opencv/CMakeLists.txt
 create mode 100644 conformity/opencv/README.md
 create mode 100644 conformity/opencv/common.h
 create mode 100644 conformity/opencv/manager.cpp
 create mode 100644 conformity/opencv/subordinate.cpp
 create mode 100644 conformity/opencv/tests.cpp
 create mode 100644 conformity/opencv/tests.h
 create mode 100755 scripts/run_opencv_conformity_checks.sh

diff --git a/README.md b/README.md
index d3a241b19..2d0fbdabe 100644
--- a/README.md
+++ b/README.md
@@ -27,8 +27,9 @@ An adapter layer API is currently provided for:
 
 The directory `intrinsiccv` contains generic implementation of the library.
 Integration with other projects are stored in `adapters` folder. `test` contains
-API and unit tests for the library. All supporting scripts are located in
-`scripts`.
+API and unit tests for the library. `benchmark` contains benchmark source.
+`conformity` contains checks to compare the library output with different
+implementations. All supporting scripts are located in `scripts`.
 
 # Standalone build using CMake
 
diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt
new file mode 100644
index 000000000..2a17f31b6
--- /dev/null
+++ b/conformity/opencv/CMakeLists.txt
@@ -0,0 +1,90 @@
+# SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.16)
+
+project("OpenCV Conformity" CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+
+set(OPENCV_PATCH_VERSION "4.9")
+set(OPENCV_VERSION "${OPENCV_PATCH_VERSION}.0")
+
+include(FetchContent)
+
+FetchContent_Declare(
+  OpenCV
+  URL https://github.com/opencv/opencv/archive/refs/tags/${OPENCV_VERSION}.tar.gz
+  PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/../../adapters/opencv/opencv-${OPENCV_PATCH_VERSION}.patch
+)
+
+FetchContent_MakeAvailable(OpenCV)
+
+# Manager
+add_executable(
+  manager
+  manager.cpp
+  tests.cpp
+)
+
+target_link_libraries(
+  manager
+  opencv_core
+  opencv_imgproc
+)
+
+target_include_directories(
+  manager
+  PRIVATE
+    ${CMAKE_BINARY_DIR}
+    ${OpenCV_SOURCE_DIR}/modules/core/include
+    ${OpenCV_SOURCE_DIR}/modules/imgproc/include
+)
+
+target_compile_definitions(
+  manager
+  PRIVATE SUBORDINATE=0
+)
+
+target_compile_options(
+  manager
+  PRIVATE
+    "-Werror"
+    "-Wall"
+    "-Wextra"
+)
+
+# Subordinate
+add_executable(
+  subordinate
+  subordinate.cpp
+  tests.cpp
+)
+
+target_link_libraries(
+  subordinate
+  opencv_core
+  opencv_imgproc
+)
+
+target_include_directories(
+  subordinate
+  PRIVATE
+    ${CMAKE_BINARY_DIR}
+    ${OpenCV_SOURCE_DIR}/modules/core/include
+    ${OpenCV_SOURCE_DIR}/modules/imgproc/include
+)
+
+target_compile_definitions(
+  subordinate
+  PRIVATE SUBORDINATE=1
+)
+
+target_compile_options(
+  subordinate
+  PRIVATE
+    "-Werror"
+    "-Wall"
+    "-Wextra"
+)
diff --git a/conformity/opencv/README.md b/conformity/opencv/README.md
new file mode 100644
index 000000000..a26fc1d4d
--- /dev/null
+++ b/conformity/opencv/README.md
@@ -0,0 +1,25 @@
+<!--
+SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+
+SPDX-License-Identifier: Apache-2.0
+-->
+
+# Conformity checks for OpenCV
+
+This CMake project makes it possible to automatically compare IntrinsicCV
+results with vanilla OpenCV for a given operation.
+
+To achieve this the project needs to be built twice (vanilla version and
+IntrinsicCV one) as the availabilty of IntrinsicCV for a given operation is a
+compile time decision. Then, the built executables (`manager` and `subordinate`,
+provided by different builds) perform the same operations, and the results are
+compared. The communication between the executables is implemented with POSIX
+IPC.
+
+The tests can be run from the project's root like:
+```
+scripts/run_opencv_conformity_checks.sh
+```
+
+The script expects an environment where IntrinsicCV can be built natively with
+`cmake` and `ninja`, and `qemu-aarch64` is available.
diff --git a/conformity/opencv/common.h b/conformity/opencv/common.h
new file mode 100644
index 000000000..f7736dcc2
--- /dev/null
+++ b/conformity/opencv/common.h
@@ -0,0 +1,273 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_OPENCV_CONFORMITY_COMMON_H_
+#define INTRINSICCV_OPENCV_CONFORMITY_COMMON_H_
+
+#include <fcntl.h>
+#include <mqueue.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <cstring>
+#include <ctime>
+#include <exception>
+#include <string>
+#include <type_traits>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+
+#define SHM_ID "/opencv_intrinisiccv_conformity_check_shm"
+#define SHM_SIZE (1024 * 1024)
+
+#define REQUEST_MQ_ID "/opencv_intrinisiccv_conformity_request_queue"
+#define REPLY_MQ_ID "/opencv_intrinisiccv_conformity_reply_queue"
+
+class ExceptionWithErrno : public std::exception {
+ public:
+  explicit ExceptionWithErrno(const std::string& msg)
+      : msg_with_errno_{add_errno_details(msg)} {}
+  virtual const char* what() const noexcept { return msg_with_errno_.c_str(); }
+
+ private:
+  std::string add_errno_details(const std::string& msg) {
+    std::string errno_string(strerror(errno));
+    return msg + ": " + errno_string;
+  }
+
+  std::string msg_with_errno_;
+};  // end of class ExceptionWithErrno
+
+// Class to provide a file descriptor created with shm_open()
+template <bool Recreated>
+class ShmFD {
+ public:
+  template <bool check = Recreated>
+  explicit ShmFD(std::enable_if_t<!check, const std::string&> id)
+      : id_{}, fd_{open(id)} {}
+
+  template <bool check = Recreated>
+  explicit ShmFD(std::enable_if_t<check, const std::string&> id)
+      : id_{id}, fd_{unlink_and_open(id)} {}
+
+  virtual ~ShmFD() {
+    close(fd_);
+    if (Recreated) {
+      shm_unlink(id_.c_str());
+    }
+  }
+
+  // Disable copying
+  ShmFD(ShmFD const&) = delete;
+  ShmFD& operator=(ShmFD) = delete;
+
+  int fd() const { return fd_; }
+
+ private:
+  static int open(const std::string& id) {
+    int fd = shm_open(id.c_str(), O_RDWR, 0666);
+    if (fd < 0) {
+      throw ExceptionWithErrno("Cannot open shared memory, id: " + id);
+    }
+    return fd;
+  }
+
+  static int unlink_and_open(const std::string& id) {
+    if (shm_unlink(id.c_str())) {
+      if (errno != ENOENT) {
+        throw ExceptionWithErrno("Cannot delete shared memory, id: " + id);
+      }
+    }
+    int fd = shm_open(id.c_str(), O_RDWR | O_CREAT | O_EXCL, 0666);
+    if (fd < 0) {
+      throw ExceptionWithErrno("Cannot open shared memory, id: " + id);
+    }
+    return fd;
+  }
+
+  const std::string id_;
+  int fd_;
+};  // end of class ShmFD<Recreated>
+
+// Class to provide mapped shared memory
+template <bool Recreated>
+class SharedMemory {
+ public:
+  explicit SharedMemory(const std::string& id, size_t size)
+      : mem_{nullptr}, size_{size}, shm_fd_{id} {
+    if (ftruncate(shm_fd_.fd(), size)) {
+      throw ExceptionWithErrno("Failed to set the size of shared memory, id: " +
+                               id);
+    }
+
+    mem_ =
+        mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd_.fd(), 0);
+    if (mem_ == MAP_FAILED) {
+      throw ExceptionWithErrno("Failed to map shared memory, id: " + id);
+    }
+  }
+
+  virtual ~SharedMemory() { munmap(mem_, size_); }
+
+  // Disable copying
+  SharedMemory(SharedMemory const&) = delete;
+  SharedMemory& operator=(SharedMemory) = delete;
+
+  cv::Mat cv_mat(int rows, int cols, int mat_type) {
+    size_t requested_size = rows * cols * cv::Mat(1, 1, mat_type).elemSize();
+    if (requested_size > size_) {
+      throw std::runtime_error(
+          "Requested matrix is bigger than the shared memory size");
+    }
+    return cv::Mat(rows, cols, mat_type, mem_);
+  }
+
+  void store_mat(const cv::Mat& mat) {
+    size_t matrix_size = mat.rows * mat.cols * mat.elemSize();
+    if (matrix_size > size_) {
+      throw std::runtime_error(
+          "Input matrix is bigger than the shared memory size");
+    }
+    memcpy(mem_, reinterpret_cast<const void*>(mat.ptr()), matrix_size);
+  }
+
+ private:
+  void* mem_;
+  size_t size_;
+  ShmFD<Recreated> shm_fd_;
+};  // end of class SharedMemory<Recreated>
+
+using OpenedSharedMemory = SharedMemory<false>;
+using RecreatedSharedMemory = SharedMemory<true>;
+
+// Class to provide a message queue
+template <bool Recreated>
+class MessageQueue {
+ public:
+  template <bool check = Recreated>
+  explicit MessageQueue(std::enable_if_t<!check, const std::string&> id,
+                        SharedMemory<false>& sm)
+      : id_{}, queue_desc_{open(id)}, sm_{sm} {}
+
+  template <bool check = Recreated>
+  explicit MessageQueue(std::enable_if_t<check, const std::string&> id,
+                        SharedMemory<true>& sm)
+      : id_{id}, queue_desc_{unlink_and_open(id)}, sm_{sm} {}
+
+  virtual ~MessageQueue() {
+    mq_close(queue_desc_);
+    if (Recreated) {
+      mq_unlink(id_.c_str());
+    }
+  }
+
+  // Disable copying
+  MessageQueue(MessageQueue const&) = delete;
+  MessageQueue& operator=(MessageQueue) = delete;
+
+  void request_exit() {
+    message m = {-1, 0, 0, 0};
+    send(m);
+  }
+
+  void request_operation(int cmd, const cv::Mat& mat) {
+    sm_.store_mat(mat);
+    message m = {cmd, mat.rows, mat.cols, mat.type()};
+    send(m);
+  }
+
+  void reply_operation(int cmd, const cv::Mat& mat) {
+    request_operation(cmd, mat);
+  }
+
+  void wait() {
+    timespec abs_timeout;
+    clock_gettime(CLOCK_REALTIME, &abs_timeout);
+    abs_timeout.tv_sec += 3;
+    ssize_t read_bytes =
+        mq_timedreceive(queue_desc_, reinterpret_cast<char*>(&last_message_),
+                        sizeof(last_message_), nullptr, &abs_timeout);
+    if (read_bytes != sizeof(last_message_)) {
+      if (read_bytes == -1) {
+        throw ExceptionWithErrno("Could not receive message");
+      } else {
+        throw std::runtime_error("Less bytes received than expected");
+      }
+    }
+  }
+
+  int last_cmd() const { return last_message_.cmd; }
+
+  cv::Mat cv_mat_from_last_msg() const {
+    return sm_.cv_mat(last_message_.rows, last_message_.cols,
+                      last_message_.type);
+  }
+
+ private:
+  struct message {
+    int cmd;
+    int rows;
+    int cols;
+    int type;
+  };
+
+  static mqd_t open(const std::string& id) {
+    mqd_t qd = mq_open(id.c_str(), O_RDWR);
+    if (qd == static_cast<mqd_t>(-1)) {
+      throw ExceptionWithErrno("Failed to open message queue, id:" + id);
+    }
+
+    return qd;
+  }
+  static mqd_t unlink_and_open(const std::string& id) {
+    if (mq_unlink(id.c_str())) {
+      if (errno != ENOENT) {
+        throw ExceptionWithErrno("Cannot delete message queue, id: " + id);
+      }
+    }
+
+    mq_attr attr = queue_attributes();
+    mqd_t qd = mq_open(id.c_str(), O_RDWR | O_CREAT | O_EXCL, 0666, &attr);
+    if (qd == static_cast<mqd_t>(-1)) {
+      throw ExceptionWithErrno("Failed to open message queue, id:" + id);
+    }
+
+    return qd;
+  }
+
+  void send(message& m) const {
+    if (mq_send(queue_desc_, reinterpret_cast<const char*>(&m), sizeof(m), 0)) {
+      throw ExceptionWithErrno("Failed to send message on queue");
+    }
+  }
+
+  static mq_attr queue_attributes() {
+    mq_attr attr;
+    attr.mq_maxmsg = 1;
+    attr.mq_msgsize = sizeof(message);
+    return attr;
+  }
+
+  const std::string id_;
+  mqd_t queue_desc_;
+  message last_message_;
+  SharedMemory<Recreated>& sm_;
+};  // end of class MessageQueue<Recreated>
+
+class OpenedMessageQueue : public MessageQueue<false> {
+ public:
+  explicit OpenedMessageQueue(const std::string& id, SharedMemory<false>& sm)
+      : MessageQueue{id, sm} {}
+};  // end of class OpenedMessageQueue
+
+class RecreatedMessageQueue : public MessageQueue<true> {
+ public:
+  explicit RecreatedMessageQueue(const std::string& id, SharedMemory<true>& sm)
+      : MessageQueue{id, sm} {}
+};  // end of class RecreatedMessageQueue
+
+#endif  // INTRINSICCV_OPENCV_CONFORMITY_COMMON_H_
diff --git a/conformity/opencv/manager.cpp b/conformity/opencv/manager.cpp
new file mode 100644
index 000000000..2d7dc2dd3
--- /dev/null
+++ b/conformity/opencv/manager.cpp
@@ -0,0 +1,56 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <csignal>
+#include <iostream>
+
+#include "common.h"
+#include "tests.h"
+
+int main(int argc, char** argv) {
+  if (argc < 2) {
+    std::cerr << "Error! Subordinate task is not defined as the first argument!"
+              << std::endl;
+    return 1;
+  }
+
+  // Block USR1 signal as it terminates the process by default
+  sigset_t usr1_sigset;
+  sigemptyset(&usr1_sigset);
+  sigaddset(&usr1_sigset, SIGUSR1);
+  sigprocmask(SIG_BLOCK, &usr1_sigset, NULL);
+
+  pid_t child_pid = fork();
+  if (child_pid == 0) {
+    // Waiting for the initialization of manager task
+    timespec timeout = {3, 0};
+    if (sigtimedwait(&usr1_sigset, NULL, &timeout) != SIGUSR1) {
+      std::cerr
+          << "Error! Wrong signal received or timeout reached in subordinate!"
+          << std::endl;
+      return 2;
+    }
+    // Starting subordinate task
+    execl(argv[1], argv[1], static_cast<char*>(NULL));
+    throw ExceptionWithErrno("Cannot start subordinate executable");
+  }
+
+  RecreatedSharedMemory sm{SHM_ID, SHM_SIZE};
+  RecreatedMessageQueue request_queue{REQUEST_MQ_ID, sm};
+  RecreatedMessageQueue reply_queue{REPLY_MQ_ID, sm};
+
+  // Let subordinate know that init is done
+  kill(child_pid, SIGUSR1);
+
+  run_tests(request_queue, reply_queue);
+
+  // Wait for subordinate to exit
+  wait(NULL);
+
+  std::cout << "Manager exits normally" << std::endl;
+}
diff --git a/conformity/opencv/subordinate.cpp b/conformity/opencv/subordinate.cpp
new file mode 100644
index 000000000..e7c77f327
--- /dev/null
+++ b/conformity/opencv/subordinate.cpp
@@ -0,0 +1,18 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <iostream>
+
+#include "common.h"
+#include "tests.h"
+
+int main(void) {
+  OpenedSharedMemory sm{SHM_ID, SHM_SIZE};
+  OpenedMessageQueue request_queue{REQUEST_MQ_ID, sm};
+  OpenedMessageQueue reply_queue{REPLY_MQ_ID, sm};
+
+  wait_for_requests(request_queue, reply_queue);
+
+  std::cout << "Subordinate exits normally" << std::endl;
+}
diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp
new file mode 100644
index 000000000..0d3d1df68
--- /dev/null
+++ b/conformity/opencv/tests.cpp
@@ -0,0 +1,137 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tests.h"
+
+#include <iostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgproc.hpp"
+
+namespace ConformityUtils {
+
+template <typename T>
+static auto abs_diff(T a, T b) {
+  return a > b ? a - b : b - a;
+}
+
+template <typename T>
+static bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) {
+  if (A.rows != B.rows || A.cols != B.cols || A.type() != B.type()) {
+    std::cout << "Matrix size/type mismatch" << std::endl;
+    return true;
+  }
+
+  for (int i = 0; i < A.rows; ++i) {
+    for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) {
+      if (abs_diff<T>(A.at<T>(i, j), B.at<T>(i, j)) > threshold) {
+        std::cout << "=== Mismatch at: " << i << " " << j << std::endl
+                  << std::endl;
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+}  // namespace ConformityUtils
+
+cv::Mat exec_float_to_int8_t(cv::Mat& input) {
+  cv::Mat result;
+  input.convertTo(result, CV_8SC1);
+  return result;
+}
+
+bool test_float_to_int8_t(int index, RecreatedMessageQueue& request_queue,
+                          RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, CV_32FC1);
+      rng.fill(input, cv::RNG::UNIFORM, -1000, 1000);
+
+      cv::Mat manager_result = exec_float_to_int8_t(input);
+
+      request_queue.request_operation(index, input);
+      reply_queue.wait();
+      if (reply_queue.last_cmd() != index) {
+        throw std::runtime_error("Invalid reply from subordinate");
+      }
+
+      cv::Mat subord_result = reply_queue.cv_mat_from_last_msg();
+
+      if (ConformityUtils::are_matrices_different<uint8_t>(0, manager_result,
+                                                           subord_result)) {
+        std::cout << "[FAIL]" << std::endl;
+        std::cout << "height=" << x << std::endl;
+        std::cout << "width=" << y << std::endl;
+        std::cout << "=== Input Matrix:" << std::endl;
+        std::cout << input << std::endl << std::endl;
+        std::cout << "=== Manager result:" << std::endl;
+        std::cout << manager_result << std::endl << std::endl;
+        std::cout << "=== Subordinate result:" << std::endl;
+        std::cout << subord_result << std::endl << std::endl;
+
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+#if SUBORDINATE
+using test = std::pair<std::string, decltype(exec_float_to_int8_t)*>;
+#define TEST(name, x, exec_func) \
+  { name, exec_func }
+#else  // MANAGER
+using test = std::pair<std::string, decltype(test_float_to_int8_t)*>;
+#define TEST(name, test_func, x) \
+  { name, test_func }
+#endif
+
+// clang-format off
+std::vector<test> tests = {
+  TEST("Float32 to Int8", test_float_to_int8_t, exec_float_to_int8_t),
+};
+// clang-format on
+
+#if SUBORDINATE
+void wait_for_requests(OpenedMessageQueue& request_queue,
+                       OpenedMessageQueue& reply_queue) {
+  while (true) {
+    request_queue.wait();
+    int cmd = request_queue.last_cmd();
+
+    if (cmd < 0) {
+      // Exit requested
+      break;
+    }
+
+    if (cmd > static_cast<int>(tests.size())) {
+      throw std::runtime_error("Invalid operation requested in subordinate");
+    }
+
+    cv::Mat input = request_queue.cv_mat_from_last_msg();
+    cv::Mat result = tests[cmd].second(input);
+    reply_queue.reply_operation(cmd, result);
+  }
+}
+#else  // MANAGER
+void run_tests(RecreatedMessageQueue& request_queue,
+               RecreatedMessageQueue& reply_queue) {
+  for (int i = 0; i < static_cast<int>(tests.size()); ++i) {
+    std::cout << "Testing " + tests[i].first << std::endl;
+    if (tests[i].second(i, request_queue, reply_queue)) {
+      break;
+    }
+  }
+  request_queue.request_exit();
+}
+#endif
diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h
new file mode 100644
index 000000000..8728d8c5f
--- /dev/null
+++ b/conformity/opencv/tests.h
@@ -0,0 +1,18 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_
+#define INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_
+
+#include "common.h"
+
+#if SUBORDINATE
+void wait_for_requests(OpenedMessageQueue& request_queue,
+                       OpenedMessageQueue& reply_queue);
+#else  // MANAGER
+void run_tests(RecreatedMessageQueue& request_queue,
+               RecreatedMessageQueue& reply_queue);
+#endif
+
+#endif  // INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_
diff --git a/scripts/ci.sh b/scripts/ci.sh
index e0c44cb0b..b81c4cf10 100755
--- a/scripts/ci.sh
+++ b/scripts/ci.sh
@@ -98,5 +98,9 @@ if [[ $(dpkg --print-architecture) = arm64 ]]; then
   # Check OpenCV-IntrinsicCV integration
   scripts/ci-opencv.sh
 fi
+if [[ $(dpkg --print-architecture) = arm64 ]]; then
+  # Compare the library output with OpenCV implementation
+  scripts/run_opencv_conformity_checks.sh
+fi
 
 exit $TESTRESULT
diff --git a/scripts/format.sh b/scripts/format.sh
index c50dfeda6..69953d462 100755
--- a/scripts/format.sh
+++ b/scripts/format.sh
@@ -29,10 +29,11 @@ INTRINSICCV_ROOT_PATH="$(realpath "${SCRIPT_PATH}"/..)"
 # ------------------------------------------------------------------------------
 
 SOURCES="$(find \
-    "${INTRINSICCV_ROOT_PATH}"/adapters \
-    "${INTRINSICCV_ROOT_PATH}"/benchmark \
-    "${INTRINSICCV_ROOT_PATH}"/intrinsiccv \
-    "${INTRINSICCV_ROOT_PATH}"/test \
+    "${INTRINSICCV_ROOT_PATH}/adapters" \
+    "${INTRINSICCV_ROOT_PATH}/benchmark" \
+    "${INTRINSICCV_ROOT_PATH}/intrinsiccv" \
+    "${INTRINSICCV_ROOT_PATH}/test" \
+    "${INTRINSICCV_ROOT_PATH}/conformity/opencv" \
     \( -name \*.cpp -o -name \*.h -o -name \*.h.in \) \
     -print)"
 
diff --git a/scripts/run_opencv_conformity_checks.sh b/scripts/run_opencv_conformity_checks.sh
new file mode 100755
index 000000000..bd58bd0ae
--- /dev/null
+++ b/scripts/run_opencv_conformity_checks.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+set -exu
+
+: "${CLEAN:=OFF}"
+
+SCRIPT_PATH="$(realpath "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)")"
+
+SOURCE_PATH="${SCRIPT_PATH}/../conformity/opencv"
+INTRINSICCV_SOURCE_PATH="${SCRIPT_PATH}/.."
+BUILD_PATH="${SCRIPT_PATH}/../build/conformity"
+OPENCV_DEFAULT_PATH="${BUILD_PATH}/opencv_default"
+OPENCV_INTRINSICCV_PATH="${BUILD_PATH}/opencv_intrinsiccv"
+
+if [[ "${CLEAN}" == "ON" ]]; then
+    rm -rf "${BUILD_PATH}"
+fi
+
+export LDFLAGS="--rtlib=compiler-rt -fuse-ld=lld"
+
+cmake -S "${SOURCE_PATH}" \
+      -B "${OPENCV_DEFAULT_PATH}" \
+      -G Ninja \
+      -DWITH_INTRINSICCV=OFF
+ninja -C "${OPENCV_DEFAULT_PATH}" subordinate
+
+cmake -S "${SOURCE_PATH}" \
+      -B "${OPENCV_INTRINSICCV_PATH}" \
+      -G Ninja \
+      -DWITH_INTRINSICCV=ON \
+      -DINTRINSICCV_SOURCE_PATH="${INTRINSICCV_SOURCE_PATH}" \
+      -DINTRINSICCV_ENABLE_SVE2=ON \
+      -DINTRINSICCV_ENABLE_SVE2_SELECTIVELY=OFF
+ninja -C "${OPENCV_INTRINSICCV_PATH}" manager
+
+qemu-aarch64 -cpu cortex-a35 "${OPENCV_INTRINSICCV_PATH}/bin/manager" "${OPENCV_DEFAULT_PATH}/bin/subordinate"
+qemu-aarch64 -cpu max,sve128=on,sme=off "${OPENCV_INTRINSICCV_PATH}/bin/manager" "${OPENCV_DEFAULT_PATH}/bin/subordinate"
+qemu-aarch64 -cpu max,sve128=on,sme512=on "${OPENCV_INTRINSICCV_PATH}/bin/manager" "${OPENCV_DEFAULT_PATH}/bin/subordinate"
-- 
GitLab


From bd6f6301da3f1ddf7ca24247b73d06cd56154b3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Wed, 3 Apr 2024 18:01:00 +0200
Subject: [PATCH 8/8] Implement float32 to uint8 type conversion

---
 adapters/opencv/intrinsiccv_hal.cpp           |  6 +++
 intrinsiccv/include/intrinsiccv/intrinsiccv.h |  8 +++-
 .../src/conversions/float_to_int_api.cpp      | 39 ++++++++-------
 .../src/conversions/float_to_int_neon.cpp     | 19 +++++---
 intrinsiccv/src/conversions/float_to_int_sc.h | 48 ++++++++++++-------
 .../src/conversions/float_to_int_sme2.cpp     | 24 ++++++----
 .../src/conversions/float_to_int_sve2.cpp     | 22 ++++++---
 7 files changed, 108 insertions(+), 58 deletions(-)

diff --git a/adapters/opencv/intrinsiccv_hal.cpp b/adapters/opencv/intrinsiccv_hal.cpp
index d22318ddc..c863fc9e5 100644
--- a/adapters/opencv/intrinsiccv_hal.cpp
+++ b/adapters/opencv/intrinsiccv_hal.cpp
@@ -678,6 +678,12 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth,
             reinterpret_cast<const float *>(src_data), src_step,
             reinterpret_cast<int8_t *>(dst_data), dst_step, width, height));
       }
+      // float32 to uint8
+      if (src_depth == CV_32F && dst_depth == CV_8U) {
+        return convert_error(intrinsiccv_type_conversion_f32_u8(
+            reinterpret_cast<const float *>(src_data), src_step,
+            reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height));
+      }
     }
     return CV_HAL_ERROR_NOT_IMPLEMENTED;
   }
diff --git a/intrinsiccv/include/intrinsiccv/intrinsiccv.h b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
index 3e5d94a2c..26eeddb8b 100644
--- a/intrinsiccv/include/intrinsiccv/intrinsiccv.h
+++ b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
@@ -1268,8 +1268,8 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src,
                             size_t width, size_t height, float scale,
                             float shift);
 
-/// Converts the elements in `src` from type `float` to type `int8_t`,
-/// then stores the result in `dst`.
+/// Converts the elements in `src` from a floating-point type to an integer
+/// type, then stores the result in `dst`.
 ///
 /// Each resulting element is saturated, i.e. it is the smallest/largest
 /// number of the type of the element if the result would underflow/overflow.
@@ -1290,6 +1290,10 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src,
 INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_s8,
                             const float *src, size_t src_stride, int8_t *dst,
                             size_t dst_stride, size_t width, size_t height);
+/// @copydoc intrinsiccv_type_conversion_f32_s8
+INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_u8,
+                            const float *src, size_t src_stride, uint8_t *dst,
+                            size_t dst_stride, size_t width, size_t height);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/intrinsiccv/src/conversions/float_to_int_api.cpp b/intrinsiccv/src/conversions/float_to_int_api.cpp
index 8a8a92f42..5503ecb8d 100644
--- a/intrinsiccv/src/conversions/float_to_int_api.cpp
+++ b/intrinsiccv/src/conversions/float_to_int_api.cpp
@@ -10,35 +10,42 @@ namespace intrinsiccv {
 
 namespace neon {
 
-intrinsiccv_error_t type_conversion_float_to_int8_t(
-    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
-    size_t width, size_t height);
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height);
 
 }  // namespace neon
 
 namespace sve2 {
 
-intrinsiccv_error_t type_conversion_float_to_int8_t(
-    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
-    size_t width, size_t height);
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height);
 
 }  // namespace sve2
 
 namespace sme2 {
 
-intrinsiccv_error_t type_conversion_float_to_int8_t(
-    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
-    size_t width, size_t height);
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height);
 
 }  // namespace sme2
 
-#define INTRINSICCV_DEFINE_C_API(name, itype, otype)                 \
-  INTRINSICCV_MULTIVERSION_C_API(                                    \
-      name, intrinsiccv::neon::type_conversion_##itype##_to_##otype, \
-      INTRINSICCV_SVE2_IMPL_IF(                                      \
-          intrinsiccv::sve2::type_conversion_##itype##_to_##otype),  \
-      intrinsiccv::sme2::type_conversion_##itype##_to_##otype)
+#define INTRINSICCV_DEFINE_C_API(name, type)                       \
+  INTRINSICCV_MULTIVERSION_C_API(                                  \
+      name, intrinsiccv::neon::type_conversion_float_to_int<type>, \
+      INTRINSICCV_SVE2_IMPL_IF(                                    \
+          intrinsiccv::sve2::type_conversion_float_to_int<type>),  \
+      intrinsiccv::sme2::type_conversion_float_to_int<type>)
 
-INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, float, int8_t);
+INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, int8_t);
+INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_u8, uint8_t);
 
 }  // namespace intrinsiccv
diff --git a/intrinsiccv/src/conversions/float_to_int_neon.cpp b/intrinsiccv/src/conversions/float_to_int_neon.cpp
index 6d3fee394..abb3614c4 100644
--- a/intrinsiccv/src/conversions/float_to_int_neon.cpp
+++ b/intrinsiccv/src/conversions/float_to_int_neon.cpp
@@ -7,14 +7,19 @@
 
 namespace intrinsiccv::neon {
 
-intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t,
-                                                    int8_t*, size_t, size_t,
-                                                    size_t);
-
-intrinsiccv_error_t type_conversion_float_to_int8_t(const float*, size_t,
-                                                    int8_t*, size_t, size_t,
-                                                    size_t) {
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float*, size_t, T*,
+                                                 size_t, size_t, size_t) {
   return INTRINSICCV_ERROR_NOT_IMPLEMENTED;
 }
 
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
+  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
+                                     type* dst, size_t dst_stride,        \
+                                     size_t width, size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
+
 }  // namespace intrinsiccv::neon
diff --git a/intrinsiccv/src/conversions/float_to_int_sc.h b/intrinsiccv/src/conversions/float_to_int_sc.h
index 10f8f749a..67afd326a 100644
--- a/intrinsiccv/src/conversions/float_to_int_sc.h
+++ b/intrinsiccv/src/conversions/float_to_int_sc.h
@@ -5,31 +5,35 @@
 #ifndef INTRINSICCV_FLOAT_TO_INT_SC_H
 #define INTRINSICCV_FLOAT_TO_INT_SC_H
 
+#include <type_traits>
+
 #include "intrinsiccv/intrinsiccv.h"
 #include "intrinsiccv/sve2.h"
 
 namespace INTRINSICCV_TARGET_NAMESPACE {
 
+template <typename OutputType>
 class float_to_int_operation final {
  public:
   using SrcVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<float>;
   using SrcVectorType = typename SrcVecTraits::VectorType;
-  using DstVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<int32_t>;
+  using DstVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<
+      std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
   using DstVectorType = typename DstVecTraits::VectorType;
 
-  using ContextType = INTRINSICCV_TARGET_NAMESPACE::Context;
   using VecTraits = SrcVecTraits;
 
   void process_row(size_t width, Columns<const float> src,
-                   Columns<int8_t> dst) {
+                   Columns<OutputType> dst) INTRINSICCV_STREAMING_COMPATIBLE {
     LoopUnroll{width, VecTraits::num_lanes()}
         .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE {
           svbool_t pg = VecTraits::svptrue();
-          Context ctx{pg};
           SrcVectorType src_vector1 = svld1(pg, &src[0]);
           SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1);
-          DstVectorType result_vector1 = vector_path(ctx, src_vector1);
-          DstVectorType result_vector2 = vector_path(ctx, src_vector2);
+          DstVectorType result_vector1 =
+              vector_path<OutputType>(pg, src_vector1);
+          DstVectorType result_vector2 =
+              vector_path<OutputType>(pg, src_vector2);
           svst1b(pg, &dst[0], result_vector1);
           svst1b_vnum(pg, &dst[0], 1, result_vector2);
           src += ptrdiff_t(step);
@@ -38,24 +42,22 @@ class float_to_int_operation final {
         .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE {
           size_t index = 0;
           svbool_t pg = VecTraits::svwhilelt(index, length);
-          Context ctx{pg};
           while (svptest_first(VecTraits::svptrue(), pg)) {
             SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
-            DstVectorType result_vector = vector_path(ctx, src_vector);
+            DstVectorType result_vector =
+                vector_path<OutputType>(pg, src_vector);
             svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
             // Update loop counter and calculate the next governing predicate.
             index += VecTraits::num_lanes();
             pg = VecTraits::svwhilelt(index, length);
-            ctx.set_predicate(pg);
           }
         });
   }
 
  private:
-  DstVectorType vector_path(ContextType ctx, SrcVectorType src)
+  template <typename T, std::enable_if_t<std::is_same_v<int8_t, T>, int> = 0>
+  DstVectorType vector_path(svbool_t& pg, SrcVectorType src)
       INTRINSICCV_STREAMING_COMPATIBLE {
-    svbool_t pg = ctx.predicate();
-
     src = svrinti_f32_x(pg, src);
 
     svbool_t less = svcmplt_n_f32(pg, src, -128.0);
@@ -66,19 +68,31 @@ class float_to_int_operation final {
 
     return svcvt_s32_f32_x(pg, src);
   }
-};  // end of class float_to_int_operation<float>
 
-static intrinsiccv_error_t type_conversion_float_to_int8_t_sc(
-    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
+  template <typename T, std::enable_if_t<std::is_same_v<uint8_t, T>, int> = 0>
+  DstVectorType vector_path(svbool_t& pg, SrcVectorType src)
+      INTRINSICCV_STREAMING_COMPATIBLE {
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, 255.0);
+    src = svdup_n_f32_m(src, greater, 255.0);
+
+    return svcvt_u32_f32_x(pg, src);
+  }
+};  // end of class float_to_int_operation<OutputType>
+
+template <typename T>
+static intrinsiccv_error_t type_conversion_float_to_int_sc(
+    const float* src, size_t src_stride, T* dst, size_t dst_stride,
     size_t width, size_t height) INTRINSICCV_STREAMING_COMPATIBLE {
   CHECK_POINTER_AND_STRIDE(src, src_stride);
   CHECK_POINTER_AND_STRIDE(dst, dst_stride);
   CHECK_IMAGE_SIZE(width, height);
 
-  float_to_int_operation operation;
+  float_to_int_operation<T> operation;
   Rectangle rect{width, height};
   Rows<const float> src_rows{src, src_stride};
-  Rows<int8_t> dst_rows{dst, dst_stride};
+  Rows<T> dst_rows{dst, dst_stride};
   zip_rows(operation, rect, src_rows, dst_rows);
 
   return INTRINSICCV_OK;
diff --git a/intrinsiccv/src/conversions/float_to_int_sme2.cpp b/intrinsiccv/src/conversions/float_to_int_sme2.cpp
index af389a575..9b2c88182 100644
--- a/intrinsiccv/src/conversions/float_to_int_sme2.cpp
+++ b/intrinsiccv/src/conversions/float_to_int_sme2.cpp
@@ -6,16 +6,22 @@
 
 namespace intrinsiccv::sme2 {
 
-INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
-type_conversion_float_to_int8_t(const float*, size_t, int8_t*, size_t, size_t,
-                                size_t);
-
-INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
-type_conversion_float_to_int8_t(const float* src, size_t src_stride,
-                                int8_t* dst, size_t dst_stride, size_t width,
-                                size_t height) {
-  return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride,
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height) {
+  return type_conversion_float_to_int_sc<T>(src, src_stride, dst, dst_stride,
                                             width, height);
 }
 
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
+  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
+                                     type* dst, size_t dst_stride,        \
+                                     size_t width, size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
+
 }  // namespace intrinsiccv::sme2
diff --git a/intrinsiccv/src/conversions/float_to_int_sve2.cpp b/intrinsiccv/src/conversions/float_to_int_sve2.cpp
index f58ea271e..9b9efdcdd 100644
--- a/intrinsiccv/src/conversions/float_to_int_sve2.cpp
+++ b/intrinsiccv/src/conversions/float_to_int_sve2.cpp
@@ -6,14 +6,22 @@
 
 namespace intrinsiccv::sve2 {
 
-INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t(
-    const float*, size_t, int8_t*, size_t, size_t, size_t);
-
-INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t type_conversion_float_to_int8_t(
-    const float* src, size_t src_stride, int8_t* dst, size_t dst_stride,
-    size_t width, size_t height) {
-  return type_conversion_float_to_int8_t_sc(src, src_stride, dst, dst_stride,
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height) {
+  return type_conversion_float_to_int_sc<T>(src, src_stride, dst, dst_stride,
                                             width, height);
 }
 
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
+  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
+                                     type* dst, size_t dst_stride,        \
+                                     size_t width, size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
+
 }  // namespace intrinsiccv::sve2
-- 
GitLab