From d238d080afe80c2b140f823fbd7f51f56cb2f1c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Fri, 22 Mar 2024 16:52:19 +0100
Subject: [PATCH 1/9] Fix typo in sve2.h

---
 intrinsiccv/include/intrinsiccv/sve2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/intrinsiccv/include/intrinsiccv/sve2.h b/intrinsiccv/include/intrinsiccv/sve2.h
index cd326d0c7..df7b4f3e9 100644
--- a/intrinsiccv/include/intrinsiccv/sve2.h
+++ b/intrinsiccv/include/intrinsiccv/sve2.h
@@ -491,7 +491,7 @@ class RemainingPathAdapter : public OperationBase<OperationType> {
   }
 };  // end of class RemainingPathAdapter<OperationType>
 
-// Shorthand for applying a generic unrolled NEON operation.
+// Shorthand for applying a generic unrolled SVE2 operation.
 template <typename OperationType, typename... ArgTypes>
 void apply_operation_by_rows(OperationType &operation, ArgTypes &&...args)
     INTRINSICCV_STREAMING_COMPATIBLE {
-- 
GitLab


From c4ec2c6ff27378b17d5a1f0805436aceebaad1a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Fri, 22 Mar 2024 17:12:28 +0100
Subject: [PATCH 2/9] Better fix for error "non-constant-expression cannot be
 narrowed"

---
 test/api/test_resize_linear.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/api/test_resize_linear.cpp b/test/api/test_resize_linear.cpp
index 86902d00d..a132474ae 100644
--- a/test/api/test_resize_linear.cpp
+++ b/test/api/test_resize_linear.cpp
@@ -243,7 +243,7 @@ static void do_large_dimensions_test(size_t x_scale, size_t y_scale) {
   src.resize(src_stride * src_height);
   dst.resize(dst_stride * dst_height);
   expected_data.resize(dst_stride * dst_height);
-  std::mt19937 generator{static_cast<unsigned>(test::Options::seed())};
+  std::mt19937 generator(test::Options::seed());
   std::generate(src.begin(), src.end(), generator);
   resize_linear_unaccelerated_u8(src.data(), src_stride, src_width, src_height,
                                  expected_data.data(), dst_stride, dst_width,
-- 
GitLab


From 8b2bc65be7f328a013d800f11e4daf7aa1e2d970 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Thu, 4 Apr 2024 11:28:09 +0200
Subject: [PATCH 3/9] Add *.h.in files checking to formatting script

---
 scripts/format.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/format.sh b/scripts/format.sh
index d01f0d21c..69953d462 100755
--- a/scripts/format.sh
+++ b/scripts/format.sh
@@ -34,7 +34,7 @@ SOURCES="$(find \
     "${INTRINSICCV_ROOT_PATH}/intrinsiccv" \
     "${INTRINSICCV_ROOT_PATH}/test" \
     "${INTRINSICCV_ROOT_PATH}/conformity/opencv" \
-    \( -name \*.cpp -o -name \*.h \) \
+    \( -name \*.cpp -o -name \*.h -o -name \*.h.in \) \
     -print)"
 
 if [[ "${CHECK_ONLY}" == "ON" ]]; then
-- 
GitLab


From 65be243ea1e94496d19c3be70bf231e64d082fa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Fri, 22 Mar 2024 17:34:23 +0100
Subject: [PATCH 4/9] Implement float32 to int8 and uint8 type conversion

---
 adapters/opencv/intrinsiccv_hal.cpp           |  15 +++
 intrinsiccv/include/intrinsiccv/intrinsiccv.h |  28 +++++
 .../src/conversions/float_to_int_api.cpp      |  51 +++++++++
 .../src/conversions/float_to_int_neon.cpp     |  25 +++++
 intrinsiccv/src/conversions/float_to_int_sc.h | 103 ++++++++++++++++++
 .../src/conversions/float_to_int_sme2.cpp     |  26 +++++
 .../src/conversions/float_to_int_sve2.cpp     |  26 +++++
 7 files changed, 274 insertions(+)
 create mode 100644 intrinsiccv/src/conversions/float_to_int_api.cpp
 create mode 100644 intrinsiccv/src/conversions/float_to_int_neon.cpp
 create mode 100644 intrinsiccv/src/conversions/float_to_int_sc.h
 create mode 100644 intrinsiccv/src/conversions/float_to_int_sme2.cpp
 create mode 100644 intrinsiccv/src/conversions/float_to_int_sve2.cpp

diff --git a/adapters/opencv/intrinsiccv_hal.cpp b/adapters/opencv/intrinsiccv_hal.cpp
index 3076ea6d5..c863fc9e5 100644
--- a/adapters/opencv/intrinsiccv_hal.cpp
+++ b/adapters/opencv/intrinsiccv_hal.cpp
@@ -670,6 +670,21 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth,
               uchar *dst_data, size_t dst_step, int dst_depth, int width,
               int height, double scale, double shift) {
   if (src_depth != dst_depth) {
+    // type conversion
+    if (scale == 1.0 && shift == 0.0) {
+      // float32 to int8
+      if (src_depth == CV_32F && dst_depth == CV_8S) {
+        return convert_error(intrinsiccv_type_conversion_f32_s8(
+            reinterpret_cast<const float *>(src_data), src_step,
+            reinterpret_cast<int8_t *>(dst_data), dst_step, width, height));
+      }
+      // float32 to uint8
+      if (src_depth == CV_32F && dst_depth == CV_8U) {
+        return convert_error(intrinsiccv_type_conversion_f32_u8(
+            reinterpret_cast<const float *>(src_data), src_step,
+            reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height));
+      }
+    }
     return CV_HAL_ERROR_NOT_IMPLEMENTED;
   }
 
diff --git a/intrinsiccv/include/intrinsiccv/intrinsiccv.h b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
index c58397274..63d969df3 100644
--- a/intrinsiccv/include/intrinsiccv/intrinsiccv.h
+++ b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
@@ -1268,6 +1268,34 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src,
                             size_t width, size_t height, float scale,
                             float shift);
 
+/// Converts the elements in `src` from a floating-point type to an integer
+/// type, then stores the result in `dst`.
+///
+/// Each resulting element is saturated, i.e. it is the smallest/largest
+/// number of the type of the element if the result would underflow/overflow.
+/// In case of special values, such as the different variations of `NaN`, the
+/// result is `0`. Source and destination data length is `width` * `height`.
+/// Number of elements is limited to @ref INTRINSICCV_MAX_IMAGE_PIXELS.
+///
+/// @param src          Pointer to the source data. Must be non-null.
+/// @param src_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the source data.
+///                     Must not be less than width * sizeof(type).
+/// @param dst          Pointer to the destination data. Must be non-null.
+/// @param dst_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the destination data.
+///                     Must not be less than width * sizeof(type).
+/// @param width        Number of elements in a row.
+/// @param height       Number of rows in the data.
+///
+INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_s8,
+                            const float *src, size_t src_stride, int8_t *dst,
+                            size_t dst_stride, size_t width, size_t height);
+/// @copydoc intrinsiccv_type_conversion_f32_s8
+INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_u8,
+                            const float *src, size_t src_stride, uint8_t *dst,
+                            size_t dst_stride, size_t width, size_t height);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/intrinsiccv/src/conversions/float_to_int_api.cpp b/intrinsiccv/src/conversions/float_to_int_api.cpp
new file mode 100644
index 000000000..5503ecb8d
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_api.cpp
@@ -0,0 +1,51 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "intrinsiccv/dispatch.h"
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/types.h"
+
+namespace intrinsiccv {
+
+namespace neon {
+
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height);
+
+}  // namespace neon
+
+namespace sve2 {
+
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height);
+
+}  // namespace sve2
+
+namespace sme2 {
+
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float* src,
+                                                 size_t src_stride, T* dst,
+                                                 size_t dst_stride,
+                                                 size_t width, size_t height);
+
+}  // namespace sme2
+
+#define INTRINSICCV_DEFINE_C_API(name, type)                       \
+  INTRINSICCV_MULTIVERSION_C_API(                                  \
+      name, intrinsiccv::neon::type_conversion_float_to_int<type>, \
+      INTRINSICCV_SVE2_IMPL_IF(                                    \
+          intrinsiccv::sve2::type_conversion_float_to_int<type>),  \
+      intrinsiccv::sme2::type_conversion_float_to_int<type>)
+
+INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, int8_t);
+INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_u8, uint8_t);
+
+}  // namespace intrinsiccv
diff --git a/intrinsiccv/src/conversions/float_to_int_neon.cpp b/intrinsiccv/src/conversions/float_to_int_neon.cpp
new file mode 100644
index 000000000..abb3614c4
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_neon.cpp
@@ -0,0 +1,25 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/neon.h"
+
+namespace intrinsiccv::neon {
+
+template <typename T>
+intrinsiccv_error_t type_conversion_float_to_int(const float*, size_t, T*,
+                                                 size_t, size_t, size_t) {
+  return INTRINSICCV_ERROR_NOT_IMPLEMENTED;
+}
+
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
+  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
+                                     type* dst, size_t dst_stride,        \
+                                     size_t width, size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
+
+}  // namespace intrinsiccv::neon
diff --git a/intrinsiccv/src/conversions/float_to_int_sc.h b/intrinsiccv/src/conversions/float_to_int_sc.h
new file mode 100644
index 000000000..ea43a59cb
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_sc.h
@@ -0,0 +1,103 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_FLOAT_TO_INT_SC_H
+#define INTRINSICCV_FLOAT_TO_INT_SC_H
+
+#include <type_traits>
+
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/sve2.h"
+
+namespace INTRINSICCV_TARGET_NAMESPACE {
+
+template <typename OutputType>
+class float_to_int_operation final {
+ public:
+  using SrcVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<float>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using IntermediateVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<
+      std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
+  using IntermediateVectorType = typename IntermediateVecTraits::VectorType;
+
+  using VecTraits = SrcVecTraits;
+
+  void process_row(size_t width, Columns<const float> src,
+                   Columns<OutputType> dst) INTRINSICCV_STREAMING_COMPATIBLE {
+    LoopUnroll{width, VecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE {
+          svbool_t pg = VecTraits::svptrue();
+          SrcVectorType src_vector1 = svld1(pg, &src[0]);
+          SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1);
+          IntermediateVectorType result_vector1 =
+              vector_path<OutputType>(pg, src_vector1);
+          IntermediateVectorType result_vector2 =
+              vector_path<OutputType>(pg, src_vector2);
+          svst1b(pg, &dst[0], result_vector1);
+          svst1b_vnum(pg, &dst[0], 1, result_vector2);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE {
+          size_t index = 0;
+          svbool_t pg = VecTraits::svwhilelt(index, length);
+          while (svptest_first(VecTraits::svptrue(), pg)) {
+            SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
+            IntermediateVectorType result_vector =
+                vector_path<OutputType>(pg, src_vector);
+            svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
+            // Update loop counter and calculate the next governing predicate.
+            index += VecTraits::num_lanes();
+            pg = VecTraits::svwhilelt(index, length);
+          }
+        });
+  }
+
+ private:
+  template <typename T, std::enable_if_t<std::is_same_v<int8_t, T>, int> = 0>
+  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
+      INTRINSICCV_STREAMING_COMPATIBLE {
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t less = svcmplt_n_f32(pg, src, -128.0);
+    src = svdup_n_f32_m(src, less, -128.0);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, 127.0);
+    src = svdup_n_f32_m(src, greater, 127.0);
+
+    return svcvt_s32_f32_x(pg, src);
+  }
+
+  template <typename T, std::enable_if_t<std::is_same_v<uint8_t, T>, int> = 0>
+  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
+      INTRINSICCV_STREAMING_COMPATIBLE {
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, 255.0);
+    src = svdup_n_f32_m(src, greater, 255.0);
+
+    return svcvt_u32_f32_x(pg, src);
+  }
+};  // end of class float_to_int_operation<OutputType>
+
+template <typename T>
+static intrinsiccv_error_t type_conversion_float_to_int_sc(
+    const float* src, size_t src_stride, T* dst, size_t dst_stride,
+    size_t width, size_t height) INTRINSICCV_STREAMING_COMPATIBLE {
+  CHECK_POINTER_AND_STRIDE(src, src_stride);
+  CHECK_POINTER_AND_STRIDE(dst, dst_stride);
+  CHECK_IMAGE_SIZE(width, height);
+
+  float_to_int_operation<T> operation;
+  Rectangle rect{width, height};
+  Rows<const float> src_rows{src, src_stride};
+  Rows<T> dst_rows{dst, dst_stride};
+  zip_rows(operation, rect, src_rows, dst_rows);
+
+  return INTRINSICCV_OK;
+}
+
+}  // namespace INTRINSICCV_TARGET_NAMESPACE
+
+#endif  // INTRINSICCV_FLOAT_TO_INT_SC_H
diff --git a/intrinsiccv/src/conversions/float_to_int_sme2.cpp b/intrinsiccv/src/conversions/float_to_int_sme2.cpp
new file mode 100644
index 000000000..64c704e58
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_sme2.cpp
@@ -0,0 +1,26 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_to_int_sc.h"
+
+namespace intrinsiccv::sme2 {
+
+template <typename T>
+INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
+type_conversion_float_to_int(const float* src, size_t src_stride, T* dst,
+                             size_t dst_stride, size_t width, size_t height) {
+  return type_conversion_float_to_int_sc<T>(src, src_stride, dst, dst_stride,
+                                            width, height);
+}
+
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
+  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
+                                     type* dst, size_t dst_stride,        \
+                                     size_t width, size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
+
+}  // namespace intrinsiccv::sme2
diff --git a/intrinsiccv/src/conversions/float_to_int_sve2.cpp b/intrinsiccv/src/conversions/float_to_int_sve2.cpp
new file mode 100644
index 000000000..a9eb25eef
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_to_int_sve2.cpp
@@ -0,0 +1,26 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_to_int_sc.h"
+
+namespace intrinsiccv::sve2 {
+
+template <typename T>
+INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
+type_conversion_float_to_int(const float* src, size_t src_stride, T* dst,
+                             size_t dst_stride, size_t width, size_t height) {
+  return type_conversion_float_to_int_sc<T>(src, src_stride, dst, dst_stride,
+                                            width, height);
+}
+
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
+  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
+                                     type* dst, size_t dst_stride,        \
+                                     size_t width, size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
+
+}  // namespace intrinsiccv::sve2
-- 
GitLab


From 1d0ad8c1db180c10287083932c60ce5bf915095a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Thu, 28 Mar 2024 18:27:48 +0100
Subject: [PATCH 5/9] Added tests for float32 to int8 and uint8 conversion

---
 intrinsiccv/include/intrinsiccv/config.h.in |  11 +
 test/api/test_float_to_int.cpp              | 341 ++++++++++++++++++++
 test/framework/array.h                      |  16 +
 test/framework/special_floats.h             |   8 +
 test/test_config.h.in                       |  11 +
 5 files changed, 387 insertions(+)
 create mode 100644 test/api/test_float_to_int.cpp
 create mode 100644 test/framework/special_floats.h

diff --git a/intrinsiccv/include/intrinsiccv/config.h.in b/intrinsiccv/include/intrinsiccv/config.h.in
index 068c88b66..dd521b82a 100644
--- a/intrinsiccv/include/intrinsiccv/config.h.in
+++ b/intrinsiccv/include/intrinsiccv/config.h.in
@@ -90,4 +90,15 @@
 #define INTRINSICCV_NODISCARD
 #endif
 
+// GCC and clang
+#ifdef __GNUC__
+#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN \
+  _Pragma("GCC diagnostic push")             \
+      _Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"")
+#define INTRINSICCV_NO_STRICT_ALIASING_END _Pragma("GCC diagnostic pop")
+#else
+#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN
+#define INTRINSICCV_NO_STRICT_ALIASING_END
+#endif
+
 #endif  // INTRINSICCV_CONFIG_H
diff --git a/test/api/test_float_to_int.cpp b/test/api/test_float_to_int.cpp
new file mode 100644
index 000000000..4c7aea619
--- /dev/null
+++ b/test/api/test_float_to_int.cpp
@@ -0,0 +1,341 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include "framework/array.h"
+#include "framework/generator.h"
+#include "framework/operation.h"
+#include "framework/special_floats.h"
+#include "framework/utils.h"
+#include "intrinsiccv/intrinsiccv.h"
+#include "test_config.h"
+
+template <typename ElementType>
+class FloatToIntTestBase {
+ private:
+  template <typename T>
+  static constexpr T min() {
+    return std::numeric_limits<T>::min();
+  }
+
+  template <typename T>
+  static constexpr T max() {
+    return std::numeric_limits<T>::max();
+  }
+
+  template <typename OutputType>
+  struct Elements {
+    size_t width;
+    size_t height;
+
+    std::vector<std::vector<ElementType>> source_rows;
+    std::vector<std::vector<OutputType>> expected_rows;
+
+    Elements(size_t _width, size_t _height,
+             std::vector<std::vector<ElementType>>&& _source_rows,
+             std::vector<std::vector<OutputType>>&& _expected_rows)
+        : width(_width),
+          height(_height),
+          source_rows(std::move(_source_rows)),
+          expected_rows(std::move(_expected_rows)) {}
+  };
+
+  static constexpr uint32_t quietNaN = 0x7FC00000;
+  static constexpr uint32_t signalingNaN = 0x7FA00000;
+  static constexpr uint32_t posInfinity = 0x7F800000;
+  static constexpr uint32_t negInfinity = 0xFF800000;
+
+  static constexpr uint32_t minusNaN = 0xFF800001;
+  static constexpr uint32_t plusNaN = 0x7F800001;
+  static constexpr uint32_t plusZero = 0x00000000;
+  static constexpr uint32_t minusZero = 0x80000000;
+
+  static constexpr uint32_t oneNaN = 0x7FC00001;
+  static constexpr uint32_t zeroDivZero = 0xFFC00000;
+  static constexpr uint32_t floatMin = 0x00800000;
+  static constexpr uint32_t floatMax = 0x7F7FFFFF;
+
+  static constexpr uint32_t posSubnormalMin = 0x00000001;
+  static constexpr uint32_t posSubnormalMax = 0x007FFFFF;
+  static constexpr uint32_t negSubnormalMin = 0x80000001;
+  static constexpr uint32_t negSubnormalMax = 0x807FFFFF;
+
+  static constexpr float _floatval(uint32_t v) {
+    static_assert(sizeof(float) == 4);
+    INTRINSICCV_NO_STRICT_ALIASING_BEGIN
+    return *reinterpret_cast<float*>(&v);
+    INTRINSICCV_NO_STRICT_ALIASING_END
+  }
+
+  const Elements<int8_t> test_case_custom_f32_s8 = {
+      // clang-format off
+    4, 8,
+    {{
+      { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) },
+      { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) },
+      { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) },
+      { _floatval(posSubnormalMin), _floatval(posSubnormalMax), _floatval(negSubnormalMin), _floatval(negSubnormalMax) },
+      { 1111.11, -1112.22, 113.33, 114.44 },
+      { 111.51, 112.62, 113.73, 114.84 },
+      { 126.66, 127.11, 128.66, 129.11 },
+      { 11.5, 12.5, -11.5, -12.5 }
+    }},
+    {{
+      { 0, 0, 127, -128 },
+      { 0, 0, 0, 0 },
+      { 0, 0, 0, 127 },
+      { 0, 0, 0, 0 },
+      { 127, -128, 113, 114 },
+      { 112, 113, 114, 115 },
+      { 127, 127, 127, 127 },
+      { 12, 12, -12, -12 }
+    }}
+      // clang-format on
+  };
+
+  const Elements<uint8_t> test_case_custom_f32_u8 = {
+      // clang-format off
+    4, 8,
+    {{
+      { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) },
+      { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) },
+      { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) },
+      { _floatval(posSubnormalMin), _floatval(posSubnormalMax), _floatval(negSubnormalMin), _floatval(negSubnormalMax) },
+      { 1111.11, -1112.22, 113.33, 114.44 },
+      { 111.51, 112.62, 113.73, 114.84 },
+      { 126.66, 127.11, 128.66, 129.11 },
+      { 11.5, 12.5, -11.5, -12.5 }
+    }},
+    {{
+      { 0, 0, 255, 0 },
+      { 0, 0, 0, 0 },
+      { 0, 0, 0, 255 },
+      { 0, 0, 0, 0 },
+      { 255, 0, 113, 114 },
+      { 112, 113, 114, 115 },
+      { 127, 127, 129, 129 },
+      { 12, 12, 0, 0 }
+    }}
+      // clang-format on
+  };
+
+  template <typename OutputType>
+  void calculate_expected(const test::Array2D<ElementType>& source,
+                          test::Array2D<OutputType>& expected) {
+    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
+      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
+        OutputType calculated = 0;
+        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
+        ElementType result = *source.at(hindex, vindex);
+        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
+        if (result > max<OutputType>()) {
+          calculated = max<OutputType>();
+        } else if (result < min<OutputType>()) {
+          calculated = min<OutputType>();
+        } else {
+          calculated = result;
+        }
+        *expected.at(hindex, vindex) = calculated;
+      }
+    }
+  }
+
+  class GenerateLinearSeries : public test::Generator<ElementType> {
+   public:
+    explicit GenerateLinearSeries(ElementType start_from)
+        : counter_{start_from} {}
+
+    std::optional<ElementType> next() override { return counter_++; }
+
+   private:
+    ElementType counter_;
+  };  // end of class GenerateLinearSeries
+
+  template <typename T>
+  size_t get_linear_height(size_t width, size_t minimum_size) {
+    size_t image_size =
+        std::max(minimum_size, static_cast<size_t>(max<T>() - min<T>()));
+    size_t height = image_size / width + 1;
+
+    return height;
+  }
+
+  template <typename OutputType>
+  std::tuple<test::Array2D<ElementType>, test::Array2D<OutputType>,
+             test::Array2D<OutputType>>
+  get_linear_arrays(size_t width, size_t height) {
+    test::Array2D<ElementType> source(width, height, 1, 1);
+    test::Array2D<OutputType> expected(width, height, 1, 1);
+    test::Array2D<OutputType> actual(width, height, 1, 1);
+
+    GenerateLinearSeries generator(min<OutputType>());
+
+    source.fill(generator);
+
+    calculate_expected<OutputType>(source, expected);
+
+    return {source, expected, actual};
+  }
+
+ public:
+  // minimum_size set by caller to trigger the 'big' conversion path.
+  void test_linear(size_t width, size_t minimum_size = 1) {
+    size_t height = get_linear_height<int8_t>(width, minimum_size);
+
+    auto arrays_s8 = get_linear_arrays<int8_t>(width, height);
+
+    test::Array2D<ElementType>& source_s8 = std::get<0>(arrays_s8);
+    test::Array2D<int8_t>& expected_s8 = std::get<1>(arrays_s8);
+    test::Array2D<int8_t>& actual_s8 = std::get<2>(arrays_s8);
+
+    ASSERT_EQ(INTRINSICCV_OK,
+              intrinsiccv_type_conversion_f32_s8(
+                  source_s8.data(), source_s8.stride(), actual_s8.data(),
+                  actual_s8.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected_s8, actual_s8);
+
+    auto arrays_u8 = get_linear_arrays<uint8_t>(width, height);
+
+    test::Array2D<ElementType>& source_u8 = std::get<0>(arrays_u8);
+    test::Array2D<uint8_t>& expected_u8 = std::get<1>(arrays_u8);
+    test::Array2D<uint8_t>& actual_u8 = std::get<2>(arrays_u8);
+
+    ASSERT_EQ(INTRINSICCV_OK,
+              intrinsiccv_type_conversion_f32_u8(
+                  source_u8.data(), source_u8.stride(), actual_u8.data(),
+                  actual_u8.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected_u8, actual_u8);
+  }
+
+  void test_custom_f32_s8() {
+    const size_t& width = test_case_custom_f32_s8.width;
+    const size_t& height = test_case_custom_f32_s8.height;
+
+    test::Array2D<ElementType> source(width, height);
+    test::Array2D<int8_t> expected(width, height);
+    test::Array2D<int8_t> actual(width, height);
+
+    for (size_t i = 0; i < height; i++) {
+      source.set(i, 0, test_case_custom_f32_s8.source_rows[i]);
+      expected.set(i, 0, test_case_custom_f32_s8.expected_rows[i]);
+    }
+
+    ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test_custom_f32_u8() {
+    const size_t& width = test_case_custom_f32_u8.width;
+    const size_t& height = test_case_custom_f32_u8.height;
+
+    test::Array2D<ElementType> source(width, height);
+    test::Array2D<uint8_t> expected(width, height);
+    test::Array2D<uint8_t> actual(width, height);
+
+    for (size_t i = 0; i < height; i++) {
+      source.set(i, 0, test_case_custom_f32_u8.source_rows[i]);
+      expected.set(i, 0, test_case_custom_f32_u8.expected_rows[i]);
+    }
+
+    ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_u8(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test_fill(const size_t width, const size_t height) {
+    test::Array2D<ElementType> source(width, height, 1, 1);
+
+    test::Array2D<int8_t> expected_s8(width, height, 1, 1);
+    test::Array2D<uint8_t> expected_u8(width, height, 1, 1);
+
+    test::Array2D<int8_t> actual_s8(width, height, 1, 1);
+    test::Array2D<uint8_t> actual_u8(width, height, 1, 1);
+
+    source.fill(10.67F);
+
+    expected_s8.fill(11);
+    expected_u8.fill(11);
+
+    actual_s8.fill(0);
+    actual_u8.fill(0);
+
+    ASSERT_EQ(INTRINSICCV_OK,
+              intrinsiccv_type_conversion_f32_s8(
+                  source.data(), source.stride(), actual_s8.data(),
+                  actual_s8.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected_s8, actual_s8);
+
+    ASSERT_EQ(INTRINSICCV_OK,
+              intrinsiccv_type_conversion_f32_u8(
+                  source.data(), source.stride(), actual_u8.data(),
+                  actual_u8.stride(), width, height));
+
+    EXPECT_EQ_ARRAY2D(expected_u8, actual_u8);
+  }
+};  // end of class FloatToIntTestBase
+
+template <typename TypeParam>
+class FloatToIntTest : public testing::Test {};
+
+using ElementTypes = ::testing::Types<float>;
+
+// Tests intrinsiccv_float_to_int API.
+TYPED_TEST_SUITE(FloatToIntTest, ElementTypes);
+
+TYPED_TEST(FloatToIntTest, TestScalar) {
+  FloatToIntTestBase<TypeParam>{}.test_linear(test::Options::vector_length() -
+                                              1);
+}
+TYPED_TEST(FloatToIntTest, TestVector) {
+  FloatToIntTestBase<TypeParam>{}.test_linear(test::Options::vector_length() *
+                                              2);
+}
+TYPED_TEST(FloatToIntTest, TestCustomValuesFloat32ToInt8) {
+  FloatToIntTestBase<TypeParam>{}.test_custom_f32_s8();
+}
+TYPED_TEST(FloatToIntTest, TestCustomValuesFloat32ToUInt8) {
+  FloatToIntTestBase<TypeParam>{}.test_custom_f32_u8();
+}
+TYPED_TEST(FloatToIntTest, TestCustomFits128VectorSize) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(4, 1);
+}
+TYPED_TEST(FloatToIntTest, TestCustomFits128VectorSize2x) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(4, 2);
+}
+TYPED_TEST(FloatToIntTest, TestCustomFits128VectorSize3x) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(4, 3);
+}
+TYPED_TEST(FloatToIntTest, TestCustomFits512VectorSize) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(4, 4);
+}
+TYPED_TEST(FloatToIntTest, TestCustomFits512VectorSize2x) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(4, 8);
+}
+TYPED_TEST(FloatToIntTest, TestCustomFits512VectorSize3x) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(6, 8);
+}
+TYPED_TEST(FloatToIntTest, TestCustom128OneRemaining) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(1, 17);
+}
+TYPED_TEST(FloatToIntTest, TestCustom128AllButOneRemaining) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(5, 3);
+}
+TYPED_TEST(FloatToIntTest, TestCustomAboutHalfRemaining) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(19, 2);
+}
+TYPED_TEST(FloatToIntTest, TestCustomEmpty) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(0, 0);
+}
+TYPED_TEST(FloatToIntTest, TestCustomOne) {
+  FloatToIntTestBase<TypeParam>{}.test_fill(1, 1);
+}
diff --git a/test/framework/array.h b/test/framework/array.h
index f854c9819..7f010cac6 100644
--- a/test/framework/array.h
+++ b/test/framework/array.h
@@ -141,6 +141,22 @@ class Array2D : public TwoDimensional<ElementType> {
     }
   }
 
+  // Sets values in a row starting at a given column from a const vector.
+  void set(size_t row, size_t column, const std::vector<ElementType> &values) {
+    ASSERT_EQ(valid(), true) << "Array is invalid.";
+    ASSERT_GE(width() - column, values.size());
+
+    ElementType *ptr = at(row, column);
+    if (!ptr) {
+      return;
+    }
+
+    size_t index = 0;
+    for (ElementType value : values) {
+      ptr[index++] = value;
+    }
+  }
+
   // Sets values starting in a given row starting at a given column.
   //
   // The layout of the input TwoDimensional object is not altered, meaning that
diff --git a/test/framework/special_floats.h b/test/framework/special_floats.h
new file mode 100644
index 000000000..780f7129d
--- /dev/null
+++ b/test/framework/special_floats.h
@@ -0,0 +1,8 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_TEST_FRAMEWORK_SPECIAL_FLOATS_H_
+#define INTRINSICCV_TEST_FRAMEWORK_SPECIAL_FLOATS_H_
+
+#endif  // INTRINSICCV_TEST_FRAMEWORK_SPECIAL_FLOATS_H_
diff --git a/test/test_config.h.in b/test/test_config.h.in
index 8b91f4d50..6cc98c850 100644
--- a/test/test_config.h.in
+++ b/test/test_config.h.in
@@ -9,4 +9,15 @@
 
 #cmakedefine INTRINSICCV_ALLOCATION_TESTS
 
+// GCC and clang
+#ifdef __GNUC__
+#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN \
+  _Pragma("GCC diagnostic push")             \
+      _Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"")
+#define INTRINSICCV_NO_STRICT_ALIASING_END _Pragma("GCC diagnostic pop")
+#else
+#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN
+#define INTRINSICCV_NO_STRICT_ALIASING_END
+#endif
+
 #endif  // INTRINSICCV_TEST_CONFIG_H
-- 
GitLab


From 80f603d9cad17ea3f12bdca33286ab21499d5e7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Tue, 2 Apr 2024 15:19:51 +0200
Subject: [PATCH 6/9] Exclude NEON float tests from CI

---
 scripts/ci.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/ci.sh b/scripts/ci.sh
index 0b9560157..fd194d7b1 100755
--- a/scripts/ci.sh
+++ b/scripts/ci.sh
@@ -51,16 +51,17 @@ ninja -C build/gcc
 
 # Run tests
 LONG_VECTOR_TESTS="GRAY2.*:RGB*"
+EXCLUDE_FLOAT_CONVERSION_TESTS="-FloatToIntTest*"
 TESTRESULT=0
 qemu-aarch64     build/test/framework/intrinsiccv-framework-test --gtest_output=xml:build/test-results/ || TESTRESULT=1
-qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-neon/ || TESTRESULT=1
+qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_filter="${EXCLUDE_FLOAT_CONVERSION_TESTS}" --gtest_output=xml:build/test-results/clang-neon/ || TESTRESULT=1
 qemu-aarch64 -cpu max,sve128=on,sme=off \
   build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-sve128/ --vector-length=16 || TESTRESULT=1
 qemu-aarch64 -cpu max,sve2048=on,sve-default-vector-length=256,sme=off \
   build/test/api/intrinsiccv-api-test --gtest_filter="${LONG_VECTOR_TESTS}" --gtest_output=xml:build/test-results/clang-sve2048/ --vector-length=256 || TESTRESULT=1
 qemu-aarch64 -cpu max,sve128=on,sme512=on \
   build/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/clang-sme/ --vector-length=64 || TESTRESULT=1
-qemu-aarch64 -cpu cortex-a35 build/gcc/test/api/intrinsiccv-api-test --gtest_output=xml:build/test-results/gcc-neon/ || TESTRESULT=1
+qemu-aarch64 -cpu cortex-a35 build/gcc/test/api/intrinsiccv-api-test --gtest_filter="${EXCLUDE_FLOAT_CONVERSION_TESTS}" --gtest_output=xml:build/test-results/gcc-neon/ || TESTRESULT=1
 
 scripts/prefix_testsuite_names.py build/test-results/clang-neon/intrinsiccv-api-test.xml "clang-neon."
 scripts/prefix_testsuite_names.py build/test-results/clang-sve128/intrinsiccv-api-test.xml "clang-sve128."
@@ -79,7 +80,7 @@ if [[ $(dpkg --print-architecture) = arm64 ]]; then
     -DINTRINSICCV_ENABLE_SME2=OFF \
     -DCMAKE_CXX_FLAGS="-fsanitize=address,undefined -fno-sanitize-recover=all -Wno-pass-failed"
   ninja -C build/sanitize intrinsiccv-api-test
-  build/sanitize/test/api/intrinsiccv-api-test
+  build/sanitize/test/api/intrinsiccv-api-test --gtest_filter="${EXCLUDE_FLOAT_CONVERSION_TESTS}"
 fi
 
 # Build benchmarks, just to prevent bitrot.
-- 
GitLab


From 4dc29204495e05e18a6d7fd4e8a0db2a65625a42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Tue, 9 Apr 2024 12:56:36 +0200
Subject: [PATCH 7/9] Refactor conformity test framework

---
 conformity/opencv/CMakeLists.txt         |   4 +
 conformity/opencv/common.h               |  13 +-
 conformity/opencv/manager.cpp            |   7 +-
 conformity/opencv/subordinate.cpp        |   7 +-
 conformity/opencv/test_gaussian_blur.cpp |  88 +++++++++
 conformity/opencv/test_gaussian_blur.h   |  14 ++
 conformity/opencv/test_sobel.cpp         |  61 ++++++
 conformity/opencv/test_sobel.h           |  14 ++
 conformity/opencv/tests.cpp              | 242 +++++------------------
 conformity/opencv/tests.h                |  48 +++++
 10 files changed, 289 insertions(+), 209 deletions(-)
 create mode 100644 conformity/opencv/test_gaussian_blur.cpp
 create mode 100644 conformity/opencv/test_gaussian_blur.h
 create mode 100644 conformity/opencv/test_sobel.cpp
 create mode 100644 conformity/opencv/test_sobel.h

diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt
index 03c3f4f23..d6d6fadbb 100644
--- a/conformity/opencv/CMakeLists.txt
+++ b/conformity/opencv/CMakeLists.txt
@@ -29,6 +29,8 @@ add_executable(
   manager
   manager.cpp
   tests.cpp
+  test_sobel.cpp
+  test_gaussian_blur.cpp
 )
 
 target_link_libraries(
@@ -63,6 +65,8 @@ add_executable(
   subordinate
   subordinate.cpp
   tests.cpp
+  test_sobel.cpp
+  test_gaussian_blur.cpp
 )
 
 target_link_libraries(
diff --git a/conformity/opencv/common.h b/conformity/opencv/common.h
index f7736dcc2..612b48283 100644
--- a/conformity/opencv/common.h
+++ b/conformity/opencv/common.h
@@ -21,11 +21,14 @@
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
 
-#define SHM_ID "/opencv_intrinisiccv_conformity_check_shm"
-#define SHM_SIZE (1024 * 1024)
-
-#define REQUEST_MQ_ID "/opencv_intrinisiccv_conformity_request_queue"
-#define REPLY_MQ_ID "/opencv_intrinisiccv_conformity_reply_queue"
+#define INTRINSICCV_CONFORMITY_SHM_ID \
+  "/opencv_intrinisiccv_conformity_check_shm"
+#define INTRINSICCV_CONFORMITY_SHM_SIZE (1024 * 1024)
+
+#define INTRINSICCV_CONFORMITY_REQUEST_MQ_ID \
+  "/opencv_intrinisiccv_conformity_request_queue"
+#define INTRINSICCV_CONFORMITY_REPLY_MQ_ID \
+  "/opencv_intrinisiccv_conformity_reply_queue"
 
 class ExceptionWithErrno : public std::exception {
  public:
diff --git a/conformity/opencv/manager.cpp b/conformity/opencv/manager.cpp
index 0b3058a2a..116845dd9 100644
--- a/conformity/opencv/manager.cpp
+++ b/conformity/opencv/manager.cpp
@@ -40,9 +40,10 @@ int main(int argc, char** argv) {
     throw ExceptionWithErrno("Cannot start subordinate executable");
   }
 
-  RecreatedSharedMemory sm{SHM_ID, SHM_SIZE};
-  RecreatedMessageQueue request_queue{REQUEST_MQ_ID, sm};
-  RecreatedMessageQueue reply_queue{REPLY_MQ_ID, sm};
+  RecreatedSharedMemory sm{INTRINSICCV_CONFORMITY_SHM_ID,
+                           INTRINSICCV_CONFORMITY_SHM_SIZE};
+  RecreatedMessageQueue request_queue{INTRINSICCV_CONFORMITY_REQUEST_MQ_ID, sm};
+  RecreatedMessageQueue reply_queue{INTRINSICCV_CONFORMITY_REPLY_MQ_ID, sm};
 
   // Let subordinate know that init is done
   kill(child_pid, SIGUSR1);
diff --git a/conformity/opencv/subordinate.cpp b/conformity/opencv/subordinate.cpp
index e7c77f327..4018c0f79 100644
--- a/conformity/opencv/subordinate.cpp
+++ b/conformity/opencv/subordinate.cpp
@@ -8,9 +8,10 @@
 #include "tests.h"
 
 int main(void) {
-  OpenedSharedMemory sm{SHM_ID, SHM_SIZE};
-  OpenedMessageQueue request_queue{REQUEST_MQ_ID, sm};
-  OpenedMessageQueue reply_queue{REPLY_MQ_ID, sm};
+  OpenedSharedMemory sm{INTRINSICCV_CONFORMITY_SHM_ID,
+                        INTRINSICCV_CONFORMITY_SHM_SIZE};
+  OpenedMessageQueue request_queue{INTRINSICCV_CONFORMITY_REQUEST_MQ_ID, sm};
+  OpenedMessageQueue reply_queue{INTRINSICCV_CONFORMITY_REPLY_MQ_ID, sm};
 
   wait_for_requests(request_queue, reply_queue);
 
diff --git a/conformity/opencv/test_gaussian_blur.cpp b/conformity/opencv/test_gaussian_blur.cpp
new file mode 100644
index 000000000..27ae51e2d
--- /dev/null
+++ b/conformity/opencv/test_gaussian_blur.cpp
@@ -0,0 +1,88 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "test_gaussian_blur.h"
+
+#include <vector>
+
+template <size_t KernelSize, size_t BorderType>
+cv::Mat exec_gaussian_blur(cv::Mat& input) {
+  cv::Size kernel(KernelSize, KernelSize);
+  cv::Mat result;
+  cv::GaussianBlur(input, result, kernel, 0, 0, BorderType);
+  return result;
+}
+
+#if MANAGER
+template <size_t KernelSize, size_t BorderType, size_t Channels>
+bool test_gaussian_blur(int index, RecreatedMessageQueue& request_queue,
+                        RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, CV_8UC(Channels));
+      rng.fill(input, cv::RNG::UNIFORM, 0, 255);
+
+      cv::Mat actual = exec_gaussian_blur<KernelSize, BorderType>(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<uint8_t>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+#endif
+
+std::vector<test>& gaussian_blur_tests_singleton() {
+  // clang-format off
+  static std::vector<test> tests = {
+    TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 1 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 1>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
+    TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 2 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 2>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
+    TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
+    TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
+
+    TEST("Gaussian blur 3x3, BORDER_REFLECT, 1 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
+    TEST("Gaussian blur 3x3, BORDER_REFLECT, 2 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
+    TEST("Gaussian blur 3x3, BORDER_REFLECT, 3 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
+    TEST("Gaussian blur 3x3, BORDER_REFLECT, 4 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 4>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
+
+    TEST("Gaussian blur 3x3, BORDER_WRAP, 1 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 1>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
+    TEST("Gaussian blur 3x3, BORDER_WRAP, 2 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 2>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
+    TEST("Gaussian blur 3x3, BORDER_WRAP, 3 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 3>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
+    TEST("Gaussian blur 3x3, BORDER_WRAP, 4 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 4>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
+
+    TEST("Gaussian blur 3x3, BORDER_REPLICATE, 1 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 1>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
+    TEST("Gaussian blur 3x3, BORDER_REPLICATE, 2 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 2>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
+    TEST("Gaussian blur 3x3, BORDER_REPLICATE, 3 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 3>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
+    TEST("Gaussian blur 3x3, BORDER_REPLICATE, 4 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 4>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
+
+    TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 1 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 1>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
+    TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 2 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 2>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
+    TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
+    TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
+
+    TEST("Gaussian blur 5x5, BORDER_REFLECT, 1 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
+    TEST("Gaussian blur 5x5, BORDER_REFLECT, 2 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
+    TEST("Gaussian blur 5x5, BORDER_REFLECT, 3 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
+    TEST("Gaussian blur 5x5, BORDER_REFLECT, 4 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 4>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
+
+    TEST("Gaussian blur 5x5, BORDER_WRAP, 1 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 1>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
+    TEST("Gaussian blur 5x5, BORDER_WRAP, 2 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 2>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
+    TEST("Gaussian blur 5x5, BORDER_WRAP, 3 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 3>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
+    TEST("Gaussian blur 5x5, BORDER_WRAP, 4 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 4>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
+
+    TEST("Gaussian blur 5x5, BORDER_REPLICATE, 1 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 1>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
+    TEST("Gaussian blur 5x5, BORDER_REPLICATE, 2 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 2>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
+    TEST("Gaussian blur 5x5, BORDER_REPLICATE, 3 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 3>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
+    TEST("Gaussian blur 5x5, BORDER_REPLICATE, 4 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 4>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
+  };
+  // clang-format on
+  return tests;
+}
diff --git a/conformity/opencv/test_gaussian_blur.h b/conformity/opencv/test_gaussian_blur.h
new file mode 100644
index 000000000..45ee18a6c
--- /dev/null
+++ b/conformity/opencv/test_gaussian_blur.h
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_OPENCV_CONFORMITY_TEST_GAUSSIAN_BLUR_H_
+#define INTRINSICCV_OPENCV_CONFORMITY_TEST_GAUSSIAN_BLUR_H_
+
+#include <vector>
+
+#include "tests.h"
+
+std::vector<test>& gaussian_blur_tests_singleton();
+
+#endif  // INTRINSICCV_OPENCV_CONFORMITY_TEST_GAUSSIAN_BLUR_H_
diff --git a/conformity/opencv/test_sobel.cpp b/conformity/opencv/test_sobel.cpp
new file mode 100644
index 000000000..a87143aa0
--- /dev/null
+++ b/conformity/opencv/test_sobel.cpp
@@ -0,0 +1,61 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "test_sobel.h"
+
+#include <vector>
+
+template <bool Vertical>
+cv::Mat exec_sobel(cv::Mat& input) {
+  cv::Mat result;
+  if constexpr (Vertical) {
+    cv::Sobel(input, result, CV_16S, 0, 1, 3, 1.0, 0.0, cv::BORDER_REPLICATE);
+  } else {
+    cv::Sobel(input, result, CV_16S, 1, 0, 3, 1.0, 0.0, cv::BORDER_REPLICATE);
+  }
+  return result;
+}
+
+#if MANAGER
+template <bool Vertical, size_t Channels>
+bool test_sobel(int index, RecreatedMessageQueue& request_queue,
+                RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, CV_8UC(Channels));
+      rng.fill(input, cv::RNG::UNIFORM, 0, 255);
+
+      cv::Mat actual = exec_sobel<Vertical>(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<uint8_t>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+#endif
+
+std::vector<test>& sobel_tests_singleton() {
+  // clang-format off
+  static std::vector<test> tests = {
+    TEST("Sobel Vertical, 1 channel", (test_sobel<true, 1>), exec_sobel<true>),
+    TEST("Sobel Vertical, 2 channel", (test_sobel<true, 2>), exec_sobel<true>),
+    TEST("Sobel Vertical, 3 channel", (test_sobel<true, 3>), exec_sobel<true>),
+    TEST("Sobel Vertical, 4 channel", (test_sobel<true, 4>), exec_sobel<true>),
+
+    TEST("Sobel Horizontal, 1 channel", (test_sobel<false, 1>), exec_sobel<false>),
+    TEST("Sobel Horizontal, 2 channel", (test_sobel<false, 2>), exec_sobel<false>),
+    TEST("Sobel Horizontal, 3 channel", (test_sobel<false, 3>), exec_sobel<false>),
+    TEST("Sobel Horizontal, 4 channel", (test_sobel<false, 4>), exec_sobel<false>),
+  };
+  // clang-format on
+  return tests;
+}
diff --git a/conformity/opencv/test_sobel.h b/conformity/opencv/test_sobel.h
new file mode 100644
index 000000000..399945c61
--- /dev/null
+++ b/conformity/opencv/test_sobel.h
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_OPENCV_CONFORMITY_TEST_SOBEL_H_
+#define INTRINSICCV_OPENCV_CONFORMITY_TEST_SOBEL_H_
+
+#include <vector>
+
+#include "tests.h"
+
+std::vector<test>& sobel_tests_singleton();
+
+#endif  // INTRINSICCV_OPENCV_CONFORMITY_TEST_SOBEL_H_
diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp
index 7aede57f7..693dbc565 100644
--- a/conformity/opencv/tests.cpp
+++ b/conformity/opencv/tests.cpp
@@ -4,220 +4,66 @@
 
 #include "tests.h"
 
+#include <initializer_list>
 #include <iostream>
-#include <string>
-#include <utility>
 #include <vector>
 
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
+#include "test_gaussian_blur.h"
+#include "test_sobel.h"
 
 #if MANAGER
-
-template <typename T>
-static auto abs_diff(T a, T b) {
-  return a > b ? a - b : b - a;
-}
-
-template <typename T>
-static bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) {
-  if (A.rows != B.rows || A.cols != B.cols || A.type() != B.type()) {
-    std::cout << "Matrix size/type mismatch" << std::endl;
-    return true;
-  }
-
-  for (int i = 0; i < A.rows; ++i) {
-    for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) {
-      if (abs_diff<T>(A.at<T>(i, j), B.at<T>(i, j)) > threshold) {
-        std::cout << "=== Mismatch at: " << i << " " << j << std::endl
-                  << std::endl;
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-template <bool Vertical, size_t Channels>
-bool test_sobel(int index, RecreatedMessageQueue& request_queue,
-                RecreatedMessageQueue& reply_queue) {
-  cv::RNG rng(0);
-
-  for (size_t x = 5; x <= 16; ++x) {
-    for (size_t y = 5; y <= 16; ++y) {
-      cv::Mat src(x, y, CV_8UC(Channels));
-      rng.fill(src, cv::RNG::UNIFORM, 0, 255);
-
-      cv::Mat manager_result;
-      if constexpr (Vertical) {
-        cv::Sobel(src, manager_result, CV_16S, 0, 1, 3, 1.0, 0.0,
-                  cv::BORDER_REPLICATE);
-      } else {
-        cv::Sobel(src, manager_result, CV_16S, 1, 0, 3, 1.0, 0.0,
-                  cv::BORDER_REPLICATE);
-      }
-
-      request_queue.request_operation(index, src);
-      reply_queue.wait();
-      if (reply_queue.last_cmd() != index) {
-        throw std::runtime_error("Invalid reply from subordinate");
-      }
-
-      cv::Mat subord_result = reply_queue.cv_mat_from_last_msg();
-
-      if (are_matrices_different<uint8_t>(0, manager_result, subord_result)) {
-        std::cout << "[FAIL]" << std::endl;
-        std::cout << "height=" << x << std::endl;
-        std::cout << "width=" << y << std::endl;
-        std::cout << "=== Src Matrix:" << std::endl;
-        std::cout << src << std::endl << std::endl;
-        std::cout << "=== Manager result:" << std::endl;
-        std::cout << manager_result << std::endl << std::endl;
-        std::cout << "=== Subordinate result:" << std::endl;
-        std::cout << subord_result << std::endl << std::endl;
-
-        return true;
-      }
-    }
-  }
-
-  return false;
+void fail_print_matrices(size_t height, size_t width, cv::Mat& input,
+                         cv::Mat& manager_result, cv::Mat& subord_result) {
+  std::cout << "[FAIL]" << std::endl;
+  std::cout << "height=" << height << std::endl;
+  std::cout << "width=" << width << std::endl;
+  std::cout << "=== Input Matrix:" << std::endl;
+  std::cout << input << std::endl << std::endl;
+  std::cout << "=== Manager result (actual):" << std::endl;
+  std::cout << manager_result << std::endl << std::endl;
+  std::cout << "=== Subordinate result (expected):" << std::endl;
+  std::cout << subord_result << std::endl << std::endl;
 }
 
-template <size_t KernelSize, size_t BorderType, size_t Channels>
-bool test_gaussian_blur(int index, RecreatedMessageQueue& request_queue,
-                        RecreatedMessageQueue& reply_queue) {
-  cv::RNG rng(0);
-  cv::Size kernel(KernelSize, KernelSize);
-
-  for (size_t x = 5; x <= 16; ++x) {
-    for (size_t y = 5; y <= 16; ++y) {
-      cv::Mat src(x, y, CV_8UC(Channels));
-      rng.fill(src, cv::RNG::UNIFORM, 0, 255);
-
-      cv::Mat manager_result;
-      cv::GaussianBlur(src, manager_result, kernel, 0, 0, BorderType);
-
-      request_queue.request_operation(index, src);
-      reply_queue.wait();
-      if (reply_queue.last_cmd() != index) {
-        throw std::runtime_error("Invalid reply from subordinate");
-      }
-
-      cv::Mat subord_result = reply_queue.cv_mat_from_last_msg();
-
-      if (are_matrices_different<uint8_t>(0, manager_result, subord_result)) {
-        std::cout << "[FAIL]" << std::endl;
-        std::cout << "height=" << x << std::endl;
-        std::cout << "width=" << y << std::endl;
-        std::cout << "=== Src Matrix:" << std::endl;
-        std::cout << src << std::endl << std::endl;
-        std::cout << "=== Manager result:" << std::endl;
-        std::cout << manager_result << std::endl << std::endl;
-        std::cout << "=== Subordinate result:" << std::endl;
-        std::cout << subord_result << std::endl << std::endl;
-
-        return true;
-      }
-    }
+cv::Mat get_expected_from_subordinate(int index,
+                                      RecreatedMessageQueue& request_queue,
+                                      RecreatedMessageQueue& reply_queue,
+                                      cv::Mat& input) {
+  request_queue.request_operation(index, input);
+  reply_queue.wait();
+  if (reply_queue.last_cmd() != index) {
+    throw std::runtime_error("Invalid reply from subordinate");
   }
 
-  return false;
+  return reply_queue.cv_mat_from_last_msg();
 }
+#endif
 
-using test = std::pair<std::string, decltype(test_sobel<true, 1>)*>;
-#define TEST(name, manager_func, subordinate_func) \
-  { name, manager_func }
-
-#else  // MANAGER
-
-template <bool Vertical>
-cv::Mat exec_sobel(cv::Mat& input) {
-  cv::Mat result;
-  if constexpr (Vertical) {
-    cv::Sobel(input, result, CV_16S, 0, 1, 3, 1.0, 0.0, cv::BORDER_REPLICATE);
-  } else {
-    cv::Sobel(input, result, CV_16S, 1, 0, 3, 1.0, 0.0, cv::BORDER_REPLICATE);
+template <typename T>
+static std::vector<T> merge_tests(
+    std::initializer_list<std::vector<test>& (*)()> test_groups) {
+  std::vector<T> all_tests;
+  for (auto singleton : test_groups) {
+    std::vector<test>& group = singleton();
+    all_tests.insert(all_tests.cend(), group.cbegin(), group.cend());
   }
-  return result;
-}
-
-template <size_t KernelSize, size_t BorderType>
-cv::Mat exec_gaussian_blur(cv::Mat& inp) {
-  cv::Size kernel(KernelSize, KernelSize);
-  cv::Mat out;
-  cv::GaussianBlur(inp, out, kernel, 0, 0, BorderType);
-  return out;
+  return all_tests;
 }
 
-using test = std::pair<std::string, decltype(exec_sobel<true>)*>;
-#define TEST(name, manager_func, subordinate_func) \
-  { name, subordinate_func }
-
-#endif  // MANAGER
-
-// clang-format off
-std::vector<test> tests = {
-  TEST("Sobel Vertical, 1 channel", (test_sobel<true, 1>), exec_sobel<true>),
-  TEST("Sobel Vertical, 2 channel", (test_sobel<true, 2>), exec_sobel<true>),
-  TEST("Sobel Vertical, 3 channel", (test_sobel<true, 3>), exec_sobel<true>),
-  TEST("Sobel Vertical, 4 channel", (test_sobel<true, 4>), exec_sobel<true>),
-
-  TEST("Sobel Horizontal, 1 channel", (test_sobel<false, 1>), exec_sobel<false>),
-  TEST("Sobel Horizontal, 2 channel", (test_sobel<false, 2>), exec_sobel<false>),
-  TEST("Sobel Horizontal, 3 channel", (test_sobel<false, 3>), exec_sobel<false>),
-  TEST("Sobel Horizontal, 4 channel", (test_sobel<false, 4>), exec_sobel<false>),
-
-  TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 1 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 1>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
-  TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 2 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 2>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
-  TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
-  TEST("Gaussian blur 3x3, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<3, cv::BORDER_REFLECT_101>)),
-
-  TEST("Gaussian blur 3x3, BORDER_REFLECT, 1 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
-  TEST("Gaussian blur 3x3, BORDER_REFLECT, 2 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
-  TEST("Gaussian blur 3x3, BORDER_REFLECT, 3 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
-  TEST("Gaussian blur 3x3, BORDER_REFLECT, 4 channel", (test_gaussian_blur<3, cv::BORDER_REFLECT, 4>), (exec_gaussian_blur<3, cv::BORDER_REFLECT>)),
-
-  TEST("Gaussian blur 3x3, BORDER_WRAP, 1 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 1>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
-  TEST("Gaussian blur 3x3, BORDER_WRAP, 2 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 2>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
-  TEST("Gaussian blur 3x3, BORDER_WRAP, 3 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 3>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
-  TEST("Gaussian blur 3x3, BORDER_WRAP, 4 channel", (test_gaussian_blur<3, cv::BORDER_WRAP, 4>), (exec_gaussian_blur<3, cv::BORDER_WRAP>)),
-
-  TEST("Gaussian blur 3x3, BORDER_REPLICATE, 1 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 1>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
-  TEST("Gaussian blur 3x3, BORDER_REPLICATE, 2 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 2>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
-  TEST("Gaussian blur 3x3, BORDER_REPLICATE, 3 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 3>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
-  TEST("Gaussian blur 3x3, BORDER_REPLICATE, 4 channel", (test_gaussian_blur<3, cv::BORDER_REPLICATE, 4>), (exec_gaussian_blur<3, cv::BORDER_REPLICATE>)),
-
-  TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 1 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 1>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
-  TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 2 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 2>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
-  TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 3 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 3>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
-  TEST("Gaussian blur 5x5, BORDER_REFLECT_101, 4 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT_101, 4>), (exec_gaussian_blur<5, cv::BORDER_REFLECT_101>)),
-
-  TEST("Gaussian blur 5x5, BORDER_REFLECT, 1 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 1>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
-  TEST("Gaussian blur 5x5, BORDER_REFLECT, 2 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 2>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
-  TEST("Gaussian blur 5x5, BORDER_REFLECT, 3 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 3>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
-  TEST("Gaussian blur 5x5, BORDER_REFLECT, 4 channel", (test_gaussian_blur<5, cv::BORDER_REFLECT, 4>), (exec_gaussian_blur<5, cv::BORDER_REFLECT>)),
-
-  TEST("Gaussian blur 5x5, BORDER_WRAP, 1 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 1>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
-  TEST("Gaussian blur 5x5, BORDER_WRAP, 2 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 2>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
-  TEST("Gaussian blur 5x5, BORDER_WRAP, 3 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 3>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
-  TEST("Gaussian blur 5x5, BORDER_WRAP, 4 channel", (test_gaussian_blur<5, cv::BORDER_WRAP, 4>), (exec_gaussian_blur<5, cv::BORDER_WRAP>)),
-
-  TEST("Gaussian blur 5x5, BORDER_REPLICATE, 1 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 1>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
-  TEST("Gaussian blur 5x5, BORDER_REPLICATE, 2 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 2>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
-  TEST("Gaussian blur 5x5, BORDER_REPLICATE, 3 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 3>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
-  TEST("Gaussian blur 5x5, BORDER_REPLICATE, 4 channel", (test_gaussian_blur<5, cv::BORDER_REPLICATE, 4>), (exec_gaussian_blur<5, cv::BORDER_REPLICATE>)),
-};
-// clang-format on
+std::vector<test> all_tests = merge_tests<test>({
+    sobel_tests_singleton,
+    gaussian_blur_tests_singleton,
+});
 
 #if MANAGER
 int run_tests(RecreatedMessageQueue& request_queue,
               RecreatedMessageQueue& reply_queue) {
   int ret_val = 0;
-  for (int i = 0; i < static_cast<int>(tests.size()); ++i) {
-    std::cout << "Testing " + tests[i].first << std::endl;
-    if (tests[i].second(i, request_queue, reply_queue)) {
+  for (int i = 0; i < static_cast<int>(all_tests.size()); ++i) {
+    std::cout << "Testing " + all_tests[i].first << std::endl;
+    if (all_tests[i].second(i, request_queue, reply_queue)) {
       ret_val = 1;
     }
   }
@@ -225,7 +71,7 @@ int run_tests(RecreatedMessageQueue& request_queue,
 
   return ret_val;
 }
-#else   // MANAGER
+#else
 void wait_for_requests(OpenedMessageQueue& request_queue,
                        OpenedMessageQueue& reply_queue) {
   while (true) {
@@ -237,13 +83,13 @@ void wait_for_requests(OpenedMessageQueue& request_queue,
       break;
     }
 
-    if (cmd > static_cast<int>(tests.size())) {
-      throw std::runtime_error("Invalid operation requestd in subordinate");
+    if (cmd > static_cast<int>(all_tests.size())) {
+      throw std::runtime_error("Invalid operation requested in subordinate");
     }
 
     cv::Mat input = request_queue.cv_mat_from_last_msg();
-    cv::Mat result = tests[cmd].second(input);
+    cv::Mat result = all_tests[cmd].second(input);
     reply_queue.reply_operation(cmd, result);
   }
 }
-#endif  // MANAGER
+#endif
diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h
index ba92cb050..164ba5c7c 100644
--- a/conformity/opencv/tests.h
+++ b/conformity/opencv/tests.h
@@ -5,14 +5,62 @@
 #ifndef INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_
 #define INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_
 
+#include <iostream>
+#include <string>
+#include <utility>
+
 #include "common.h"
 
 #if MANAGER
+template <typename T>
+static auto abs_diff(T a, T b) {
+  return a > b ? a - b : b - a;
+}
+
+template <typename T>
+bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) {
+  if (A.rows != B.rows || A.cols != B.cols || A.type() != B.type()) {
+    std::cout << "Matrix size/type mismatch" << std::endl;
+    return true;
+  }
+
+  for (int i = 0; i < A.rows; ++i) {
+    for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) {
+      if (abs_diff<T>(A.at<T>(i, j), B.at<T>(i, j)) > threshold) {
+        std::cout << "=== Mismatch at: " << i << " " << j << std::endl
+                  << std::endl;
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+void fail_print_matrices(size_t height, size_t width, cv::Mat& input,
+                         cv::Mat& manager_result, cv::Mat& subord_result);
+
+cv::Mat get_expected_from_subordinate(int index,
+                                      RecreatedMessageQueue& request_queue,
+                                      RecreatedMessageQueue& reply_queue,
+                                      cv::Mat& input);
+
 int run_tests(RecreatedMessageQueue& request_queue,
               RecreatedMessageQueue& reply_queue);
+
+typedef bool (*test_function)(int index, RecreatedMessageQueue& request_queue,
+                              RecreatedMessageQueue& reply_queue);
+using test = std::pair<std::string, test_function>;
+#define TEST(name, test_func, x) \
+  { name, test_func }
 #else
 void wait_for_requests(OpenedMessageQueue& request_queue,
                        OpenedMessageQueue& reply_queue);
+
+typedef cv::Mat (*exec_function)(cv::Mat& input);
+using test = std::pair<std::string, exec_function>;
+#define TEST(name, x, exec_func) \
+  { name, exec_func }
 #endif
 
 #endif  // INTRINSICCV_OPENCV_CONFORMITY_TESTS_H_
-- 
GitLab


From d8e61aa13c2d2bcc6478afe95d921caa691fa039 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= <igor.podgainoi@arm.com>
Date: Wed, 3 Apr 2024 13:46:36 +0200
Subject: [PATCH 8/9] Added OpenCV conformity tests (float32 to int8 and uint8)

---
 conformity/opencv/CMakeLists.txt        |   2 +
 conformity/opencv/special_floats.h      |   8 ++
 conformity/opencv/test_float_to_int.cpp | 122 ++++++++++++++++++++++++
 conformity/opencv/test_float_to_int.h   |  14 +++
 conformity/opencv/tests.cpp             |   2 +
 5 files changed, 148 insertions(+)
 create mode 100644 conformity/opencv/special_floats.h
 create mode 100644 conformity/opencv/test_float_to_int.cpp
 create mode 100644 conformity/opencv/test_float_to_int.h

diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt
index d6d6fadbb..92935aaed 100644
--- a/conformity/opencv/CMakeLists.txt
+++ b/conformity/opencv/CMakeLists.txt
@@ -31,6 +31,7 @@ add_executable(
   tests.cpp
   test_sobel.cpp
   test_gaussian_blur.cpp
+  test_float_to_int.cpp
 )
 
 target_link_libraries(
@@ -67,6 +68,7 @@ add_executable(
   tests.cpp
   test_sobel.cpp
   test_gaussian_blur.cpp
+  test_float_to_int.cpp
 )
 
 target_link_libraries(
diff --git a/conformity/opencv/special_floats.h b/conformity/opencv/special_floats.h
new file mode 100644
index 000000000..69d42e1c0
--- /dev/null
+++ b/conformity/opencv/special_floats.h
@@ -0,0 +1,8 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_OPENCV_CONFORMITY_SPECIAL_FLOATS_H_
+#define INTRINSICCV_OPENCV_CONFORMITY_SPECIAL_FLOATS_H_
+
+#endif  // INTRINSICCV_OPENCV_CONFORMITY_SPECIAL_FLOATS_H_
diff --git a/conformity/opencv/test_float_to_int.cpp b/conformity/opencv/test_float_to_int.cpp
new file mode 100644
index 000000000..03e9d5a24
--- /dev/null
+++ b/conformity/opencv/test_float_to_int.cpp
@@ -0,0 +1,122 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "test_float_to_int.h"
+
+#include <vector>
+
+#include "special_floats.h"
+
+float floatval_(uint32_t v) {
+  static_assert(sizeof(float) == 4);
+  return *reinterpret_cast<float*>(&v);
+}
+
+float quietNaN = floatval_(0x7FC00000);
+float signalingNaN = floatval_(0x7FA00000);
+float posInfinity = floatval_(0x7F800000);
+float negInfinity = floatval_(0xFF800000);
+
+float minusNaN = floatval_(0xFF800001);
+float plusNaN = floatval_(0x7F800001);
+float plusZero = floatval_(0x00000000);
+float minusZero = floatval_(0x80000000);
+
+float oneNaN = floatval_(0x7FC00001);
+float zeroDivZero = floatval_(0xFFC00000);
+float floatMin = floatval_(0x00800000);
+float floatMax = floatval_(0x7F7FFFFF);
+
+float posSubnormalMin = floatval_(0x00000001);
+float posSubnormalMax = floatval_(0x007FFFFF);
+float negSubnormalMin = floatval_(0x80000001);
+float negSubnormalMax = floatval_(0x807FFFFF);
+
+template <bool Signed>
+cv::Mat exec_float32_to_int8(cv::Mat& input) {
+  cv::Mat result;
+  input.convertTo(result, Signed ? CV_8SC1 : CV_8UC1);
+  return result;
+}
+
+#if MANAGER
+template <bool Signed, size_t Channels>
+bool test_float32_to_int8_fill(int index, RecreatedMessageQueue& request_queue,
+                               RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, CV_32FC(Channels));
+      rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000);
+
+      cv::Mat actual = exec_float32_to_int8<Signed>(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<uint8_t>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+static constexpr int custom_data_height = 8;
+static constexpr int custom_data_width = 4;
+
+static float custom_data[custom_data_height * custom_data_width] = {
+    // clang-format off
+  quietNaN, signalingNaN, posInfinity, negInfinity,
+  minusNaN, plusNaN, plusZero, minusZero,
+  oneNaN, zeroDivZero, floatMin, floatMax,
+  posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax,
+  1111.11, -1112.22, 113.33, 114.44,
+  111.51, 112.62, 113.73, 114.84,
+  126.66, 127.11, 128.66, 129.11,
+  11.5, 12.5, -11.5, -12.5,
+    // clang-format on
+};
+
+template <bool Signed>
+bool test_float32_to_int8_custom(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::Mat input(custom_data_height, custom_data_width, CV_32FC1, custom_data);
+
+  cv::Mat actual = exec_float32_to_int8<Signed>(input);
+  cv::Mat expected =
+      get_expected_from_subordinate(index, request_queue, reply_queue, input);
+
+  if (are_matrices_different<uint8_t>(0, actual, expected)) {
+    fail_print_matrices(custom_data_height, custom_data_width, input, actual,
+                        expected);
+    return true;
+  }
+
+  return false;
+}
+#endif
+
+std::vector<test>& float_to_int_tests_singleton() {
+  // clang-format off
+  static std::vector<test> tests = {
+    TEST("Float32 to Signed Int8, fill, 1 channel", (test_float32_to_int8_fill<true, 1>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, fill, 2 channel", (test_float32_to_int8_fill<true, 2>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, fill, 3 channel", (test_float32_to_int8_fill<true, 3>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, fill, 4 channel", (test_float32_to_int8_fill<true, 4>), exec_float32_to_int8<true>),
+
+    TEST("Float32 to Unsigned Int8, fill, 1 channel", (test_float32_to_int8_fill<false, 1>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, fill, 2 channel", (test_float32_to_int8_fill<false, 2>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, fill, 3 channel", (test_float32_to_int8_fill<false, 3>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, fill, 4 channel", (test_float32_to_int8_fill<false, 4>), exec_float32_to_int8<false>),
+
+    TEST("Float32 to Signed Int8, custom (special)", test_float32_to_int8_custom<true>, exec_float32_to_int8<true>),
+    TEST("Float32 to Unsigned Int8, custom (special)", test_float32_to_int8_custom<false>, exec_float32_to_int8<false>),
+  };
+  // clang-format on
+  return tests;
+}
diff --git a/conformity/opencv/test_float_to_int.h b/conformity/opencv/test_float_to_int.h
new file mode 100644
index 000000000..0fb92611c
--- /dev/null
+++ b/conformity/opencv/test_float_to_int.h
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_TO_INT_H_
+#define INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_TO_INT_H_
+
+#include <vector>
+
+#include "tests.h"
+
+std::vector<test>& float_to_int_tests_singleton();
+
+#endif  // INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_TO_INT_H_
diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp
index 693dbc565..b3db6055f 100644
--- a/conformity/opencv/tests.cpp
+++ b/conformity/opencv/tests.cpp
@@ -10,6 +10,7 @@
 
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
+#include "test_float_to_int.h"
 #include "test_gaussian_blur.h"
 #include "test_sobel.h"
 
@@ -55,6 +56,7 @@ static std::vector<T> merge_tests(
 std::vector<test> all_tests = merge_tests<test>({
     sobel_tests_singleton,
     gaussian_blur_tests_singleton,
+    float_to_int_tests_singleton,
 });
 
 #if MANAGER
-- 
GitLab


From 5b2fbc5a5c8d49f8c1c864df4e3dd051887dda01 Mon Sep 17 00:00:00 2001
From: Ioana Ghiban <ioana.ghiban@arm.com>
Date: Tue, 9 Apr 2024 14:15:37 +0200
Subject: [PATCH 9/9] Implement and test int8 to float32 conversions

---
 adapters/opencv/intrinsiccv_hal.cpp           |  63 ++-
 conformity/opencv/CMakeLists.txt              |   4 +-
 conformity/opencv/special_floats.h            |   8 -
 conformity/opencv/test_float_conv.cpp         | 214 ++++++++
 conformity/opencv/test_float_conv.h           |  14 +
 conformity/opencv/test_float_to_int.cpp       | 122 -----
 conformity/opencv/test_float_to_int.h         |  14 -
 conformity/opencv/tests.cpp                   |   8 +-
 intrinsiccv/include/intrinsiccv/config.h.in   |  11 -
 intrinsiccv/include/intrinsiccv/dispatch.h    |   6 +-
 intrinsiccv/include/intrinsiccv/intrinsiccv.h |  45 +-
 intrinsiccv/src/analysis/min_max_api.cpp      |   8 +-
 intrinsiccv/src/arithmetics/absdiff_api.cpp   |  10 +-
 .../add_abs_with_threshold_api.cpp            |  12 +-
 intrinsiccv/src/arithmetics/add_api.cpp       |  10 +-
 intrinsiccv/src/arithmetics/multiply_api.cpp  |   8 +-
 intrinsiccv/src/arithmetics/scale_api.cpp     |   4 +-
 intrinsiccv/src/arithmetics/sub_api.cpp       |  10 +-
 intrinsiccv/src/arithmetics/threshold_api.cpp |  10 +-
 intrinsiccv/src/arithmetics/transpose_api.cpp |   2 +-
 .../src/conversions/float_conv_api.cpp        |  71 +++
 .../src/conversions/float_conv_neon.cpp       |  27 +
 intrinsiccv/src/conversions/float_conv_sc.h   | 166 ++++++
 .../src/conversions/float_conv_sme2.cpp       |  28 ++
 .../src/conversions/float_conv_sve2.cpp       |  28 ++
 .../src/conversions/float_to_int_api.cpp      |  51 --
 .../src/conversions/float_to_int_neon.cpp     |  25 -
 intrinsiccv/src/conversions/float_to_int_sc.h | 103 ----
 .../src/conversions/float_to_int_sme2.cpp     |  26 -
 .../src/conversions/float_to_int_sve2.cpp     |  26 -
 .../src/conversions/gray_to_rgb_api.cpp       |  10 +-
 intrinsiccv/src/conversions/merge_api.cpp     |   2 +-
 .../src/conversions/rgb_to_rgb_api.cpp        |  10 +-
 intrinsiccv/src/conversions/split_api.cpp     |   2 +-
 .../src/conversions/yuv_to_rgb_api.cpp        |   8 +-
 intrinsiccv/src/filters/gaussian_blur_api.cpp |   8 +-
 intrinsiccv/src/filters/sobel_api.cpp         |  10 +-
 intrinsiccv/src/morphology/morphology_api.cpp |  10 +-
 intrinsiccv/src/resize/resize_api.cpp         |   6 +-
 intrinsiccv/src/resize/resize_linear_api.cpp  |   6 +-
 scripts/ci.sh                                 |   2 +-
 test/api/test_float_conv.cpp                  | 476 ++++++++++++++++++
 test/api/test_float_to_int.cpp                | 341 -------------
 test/framework/generator.h                    |  12 +
 test/framework/special_floats.h               |   8 -
 test/framework/utils.h                        |   8 +
 46 files changed, 1203 insertions(+), 850 deletions(-)
 delete mode 100644 conformity/opencv/special_floats.h
 create mode 100644 conformity/opencv/test_float_conv.cpp
 create mode 100644 conformity/opencv/test_float_conv.h
 delete mode 100644 conformity/opencv/test_float_to_int.cpp
 delete mode 100644 conformity/opencv/test_float_to_int.h
 create mode 100644 intrinsiccv/src/conversions/float_conv_api.cpp
 create mode 100644 intrinsiccv/src/conversions/float_conv_neon.cpp
 create mode 100644 intrinsiccv/src/conversions/float_conv_sc.h
 create mode 100644 intrinsiccv/src/conversions/float_conv_sme2.cpp
 create mode 100644 intrinsiccv/src/conversions/float_conv_sve2.cpp
 delete mode 100644 intrinsiccv/src/conversions/float_to_int_api.cpp
 delete mode 100644 intrinsiccv/src/conversions/float_to_int_neon.cpp
 delete mode 100644 intrinsiccv/src/conversions/float_to_int_sc.h
 delete mode 100644 intrinsiccv/src/conversions/float_to_int_sme2.cpp
 delete mode 100644 intrinsiccv/src/conversions/float_to_int_sve2.cpp
 create mode 100644 test/api/test_float_conv.cpp
 delete mode 100644 test/api/test_float_to_int.cpp
 delete mode 100644 test/framework/special_floats.h

diff --git a/adapters/opencv/intrinsiccv_hal.cpp b/adapters/opencv/intrinsiccv_hal.cpp
index c863fc9e5..ea9d27fa8 100644
--- a/adapters/opencv/intrinsiccv_hal.cpp
+++ b/adapters/opencv/intrinsiccv_hal.cpp
@@ -669,36 +669,49 @@ int min_max_idx(const uchar *src_data, size_t src_step, int width, int height,
 int convertTo(const uchar *src_data, size_t src_step, int src_depth,
               uchar *dst_data, size_t dst_step, int dst_depth, int width,
               int height, double scale, double shift) {
-  if (src_depth != dst_depth) {
-    // type conversion
-    if (scale == 1.0 && shift == 0.0) {
-      // float32 to int8
-      if (src_depth == CV_32F && dst_depth == CV_8S) {
-        return convert_error(intrinsiccv_type_conversion_f32_s8(
-            reinterpret_cast<const float *>(src_data), src_step,
-            reinterpret_cast<int8_t *>(dst_data), dst_step, width, height));
-      }
-      // float32 to uint8
-      if (src_depth == CV_32F && dst_depth == CV_8U) {
-        return convert_error(intrinsiccv_type_conversion_f32_u8(
-            reinterpret_cast<const float *>(src_data), src_step,
-            reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height));
-      }
+  // scaling only
+  if (src_depth == dst_depth) {
+    // no scaling, no advantage
+    if (fabs(scale - 1.0) < std::numeric_limits<double>::epsilon() &&
+        fabs(shift) < std::numeric_limits<double>::epsilon()) {
+      return CV_HAL_ERROR_NOT_IMPLEMENTED;
     }
-    return CV_HAL_ERROR_NOT_IMPLEMENTED;
-  }
 
-  // no scaling, no advantage
-  if (fabs(scale - 1.0) < std::numeric_limits<double>::epsilon() &&
-      fabs(shift) < std::numeric_limits<double>::epsilon()) {
+    if (src_depth == CV_8U) {
+      return convert_error(intrinsiccv_scale_u8(
+          reinterpret_cast<const uint8_t *>(src_data), src_step,
+          reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height,
+          static_cast<float>(scale), static_cast<float>(shift)));
+    }
     return CV_HAL_ERROR_NOT_IMPLEMENTED;
   }
 
-  if (src_depth == CV_8U) {
-    return convert_error(intrinsiccv_scale_u8(
-        reinterpret_cast<const uint8_t *>(src_data), src_step,
-        reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height,
-        static_cast<float>(scale), static_cast<float>(shift)));
+  // type conversion only
+  if (scale == 1.0 && shift == 0.0) {
+    // float32 to int8
+    if (src_depth == CV_32F && dst_depth == CV_8S) {
+      return convert_error(intrinsiccv_float_conversion_f32_s8(
+          reinterpret_cast<const float *>(src_data), src_step,
+          reinterpret_cast<int8_t *>(dst_data), dst_step, width, height));
+    }
+    // float32 to uint8
+    if (src_depth == CV_32F && dst_depth == CV_8U) {
+      return convert_error(intrinsiccv_float_conversion_f32_u8(
+          reinterpret_cast<const float *>(src_data), src_step,
+          reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height));
+    }
+    // int8 to float32
+    if (src_depth == CV_8S && dst_depth == CV_32F) {
+      return convert_error(intrinsiccv_float_conversion_s8_f32(
+          reinterpret_cast<const int8_t *>(src_data), src_step,
+          reinterpret_cast<float *>(dst_data), dst_step, width, height));
+    }
+    // uint8 to float32
+    if (src_depth == CV_8U && dst_depth == CV_32F) {
+      return convert_error(intrinsiccv_float_conversion_u8_f32(
+          reinterpret_cast<const uint8_t *>(src_data), src_step,
+          reinterpret_cast<float *>(dst_data), dst_step, width, height));
+    }
   }
   return CV_HAL_ERROR_NOT_IMPLEMENTED;
 }
diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt
index 92935aaed..8f821260e 100644
--- a/conformity/opencv/CMakeLists.txt
+++ b/conformity/opencv/CMakeLists.txt
@@ -31,7 +31,7 @@ add_executable(
   tests.cpp
   test_sobel.cpp
   test_gaussian_blur.cpp
-  test_float_to_int.cpp
+  test_float_conv.cpp
 )
 
 target_link_libraries(
@@ -68,7 +68,7 @@ add_executable(
   tests.cpp
   test_sobel.cpp
   test_gaussian_blur.cpp
-  test_float_to_int.cpp
+  test_float_conv.cpp
 )
 
 target_link_libraries(
diff --git a/conformity/opencv/special_floats.h b/conformity/opencv/special_floats.h
deleted file mode 100644
index 69d42e1c0..000000000
--- a/conformity/opencv/special_floats.h
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef INTRINSICCV_OPENCV_CONFORMITY_SPECIAL_FLOATS_H_
-#define INTRINSICCV_OPENCV_CONFORMITY_SPECIAL_FLOATS_H_
-
-#endif  // INTRINSICCV_OPENCV_CONFORMITY_SPECIAL_FLOATS_H_
diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp
new file mode 100644
index 000000000..6b3abfe68
--- /dev/null
+++ b/conformity/opencv/test_float_conv.cpp
@@ -0,0 +1,214 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "test_float_conv.h"
+
+#include <vector>
+
+float floatval_(uint32_t v) {
+  static_assert(sizeof(float) == 4);
+  return *reinterpret_cast<float*>(&v);
+}
+
+float quietNaN = floatval_(0x7FC00000);
+float signalingNaN = floatval_(0x7FA00000);
+float posInfinity = floatval_(0x7F800000);
+float negInfinity = floatval_(0xFF800000);
+
+float minusNaN = floatval_(0xFF800001);
+float plusNaN = floatval_(0x7F800001);
+float plusZero = floatval_(0x00000000);
+float minusZero = floatval_(0x80000000);
+
+float oneNaN = floatval_(0x7FC00001);
+float zeroDivZero = floatval_(0xFFC00000);
+float floatMin = floatval_(0x00800000);
+float floatMax = floatval_(0x7F7FFFFF);
+
+float posSubnormalMin = floatval_(0x00000001);
+float posSubnormalMax = floatval_(0x007FFFFF);
+float negSubnormalMin = floatval_(0x80000001);
+float negSubnormalMax = floatval_(0x807FFFFF);
+
+template <bool Signed>
+cv::Mat exec_float32_to_int8(cv::Mat& input) {
+  cv::Mat result;
+  input.convertTo(result, Signed ? CV_8SC1 : CV_8UC1);
+  return result;
+}
+
+cv::Mat exec_int8_to_float32(cv::Mat& input) {
+  cv::Mat result;
+  input.convertTo(result, CV_32FC1);
+  return result;
+}
+
+#if MANAGER
+template <bool Signed, size_t Channels>
+bool test_float32_to_int8_random(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, CV_32FC(Channels));
+      rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000);
+
+      cv::Mat actual = exec_float32_to_int8<Signed>(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<uint8_t>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+template <bool Signed, size_t Channels>
+bool test_int8_to_float32_random(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, Signed ? CV_8SC(Channels) : CV_8UC(Channels));
+      rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000);
+
+      cv::Mat actual = exec_int8_to_float32(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<float>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+static constexpr int custom_data_height = 8;
+static constexpr int custom_data_width = 4;
+
+static float custom_data_float[custom_data_height * custom_data_width] = {
+    // clang-format off
+  quietNaN, signalingNaN, posInfinity, negInfinity,
+  minusNaN, plusNaN, plusZero, minusZero,
+  oneNaN, zeroDivZero, floatMin, floatMax,
+  posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax,
+  1111.11, -1112.22, 113.33, 114.44,
+  111.51, 112.62, 113.73, 114.84,
+  126.66, 127.11, 128.66, 129.11,
+  11.5, 12.5, -11.5, -12.5,
+    // clang-format on
+};
+
+static int8_t custom_data_int8[custom_data_height * custom_data_width] = {
+    // clang-format off
+  -128, -128, 126, 127,
+  -128, -128, -128, -128,
+  -128, -128, -128, 126,
+  -127, -127, -127, 125,
+  126, 127, 113, 114,
+  112, 113, 114, 115,
+  12, 12, 12, 12,
+  11, 11, 11, 11,
+    // clang-format on
+};
+
+static uint8_t custom_data_uint8[custom_data_height * custom_data_width] = {
+    // clang-format off
+  0, 0, 254, 255,
+  0, 0, 0, 0,
+  0, 0, 0, 254,
+  1, 1, 1, 253,
+  254, 255, 113, 114,
+  112, 113, 114, 115,
+  12, 12, 12, 12,
+  11, 11, 11, 11,
+    // clang-format on
+};
+
+template <bool Signed>
+bool test_float32_to_int8_custom(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::Mat input(custom_data_height, custom_data_width, CV_32FC1,
+                custom_data_float);
+
+  cv::Mat actual = exec_float32_to_int8<Signed>(input);
+  cv::Mat expected =
+      get_expected_from_subordinate(index, request_queue, reply_queue, input);
+
+  if (are_matrices_different<uint8_t>(0, actual, expected)) {
+    fail_print_matrices(custom_data_height, custom_data_width, input, actual,
+                        expected);
+    return true;
+  }
+
+  return false;
+}
+
+template <bool Signed>
+bool test_int8_to_float32_custom(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::Mat input(custom_data_height, custom_data_width,
+                Signed ? CV_8SC1 : CV_8UC1,
+                Signed ? static_cast<void*>(custom_data_int8)
+                       : static_cast<void*>(custom_data_uint8));
+
+  cv::Mat actual = exec_int8_to_float32(input);
+  cv::Mat expected =
+      get_expected_from_subordinate(index, request_queue, reply_queue, input);
+
+  if (are_matrices_different<float>(0, actual, expected)) {
+    fail_print_matrices(custom_data_height, custom_data_width, input, actual,
+                        expected);
+    return true;
+  }
+
+  return false;
+}
+#endif
+
+std::vector<test>& float_conversion_tests_singleton() {
+  // clang-format off
+  static std::vector<test> tests = {
+    TEST("Float32 to Signed Int8, fill, 1 channel", (test_float32_to_int8_random<true, 1>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, fill, 2 channel", (test_float32_to_int8_random<true, 2>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, fill, 3 channel", (test_float32_to_int8_random<true, 3>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, fill, 4 channel", (test_float32_to_int8_random<true, 4>), exec_float32_to_int8<true>),
+
+    TEST("Float32 to Unsigned Int8, fill, 1 channel", (test_float32_to_int8_random<false, 1>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, fill, 2 channel", (test_float32_to_int8_random<false, 2>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, fill, 3 channel", (test_float32_to_int8_random<false, 3>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, fill, 4 channel", (test_float32_to_int8_random<false, 4>), exec_float32_to_int8<false>),
+
+    TEST("Float32 to Signed Int8, custom (special)", test_float32_to_int8_custom<true>, exec_float32_to_int8<true>),
+    TEST("Float32 to Unsigned Int8, custom (special)", test_float32_to_int8_custom<false>, exec_float32_to_int8<false>),
+
+    TEST("Signed Int8 to Float32, fill, 1 channel", (test_int8_to_float32_random<true, 1>), exec_int8_to_float32),
+    TEST("Signed Int8 to Float32, fill, 2 channel", (test_int8_to_float32_random<true, 2>), exec_int8_to_float32),
+    TEST("Signed Int8 to Float32, fill, 3 channel", (test_int8_to_float32_random<true, 3>), exec_int8_to_float32),
+    TEST("Signed Int8 to Float32, fill, 4 channel", (test_int8_to_float32_random<true, 4>), exec_int8_to_float32),
+
+    TEST("Unsigned Int8 to Float32, fill, 1 channel", (test_int8_to_float32_random<false, 1>), exec_int8_to_float32),
+    TEST("Unsigned Int8 to Float32, fill, 2 channel", (test_int8_to_float32_random<false, 2>), exec_int8_to_float32),
+    TEST("Unsigned Int8 to Float32, fill, 3 channel", (test_int8_to_float32_random<false, 3>), exec_int8_to_float32),
+    TEST("Unsigned Int8 to Float32, fill, 4 channel", (test_int8_to_float32_random<false, 4>), exec_int8_to_float32),
+
+    TEST("Signed Int8 Float32, custom (special)", test_int8_to_float32_custom<true>, exec_int8_to_float32),
+    TEST("Unigned Int8 Float32, custom (special)", test_int8_to_float32_custom<false>, exec_int8_to_float32),
+  };
+  // clang-format on
+  return tests;
+}
diff --git a/conformity/opencv/test_float_conv.h b/conformity/opencv/test_float_conv.h
new file mode 100644
index 000000000..ab7a6d8a1
--- /dev/null
+++ b/conformity/opencv/test_float_conv.h
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_
+#define INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_
+
+#include <vector>
+
+#include "tests.h"
+
+std::vector<test>& float_conversion_tests_singleton();
+
+#endif  // INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_
diff --git a/conformity/opencv/test_float_to_int.cpp b/conformity/opencv/test_float_to_int.cpp
deleted file mode 100644
index 03e9d5a24..000000000
--- a/conformity/opencv/test_float_to_int.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include "test_float_to_int.h"
-
-#include <vector>
-
-#include "special_floats.h"
-
-float floatval_(uint32_t v) {
-  static_assert(sizeof(float) == 4);
-  return *reinterpret_cast<float*>(&v);
-}
-
-float quietNaN = floatval_(0x7FC00000);
-float signalingNaN = floatval_(0x7FA00000);
-float posInfinity = floatval_(0x7F800000);
-float negInfinity = floatval_(0xFF800000);
-
-float minusNaN = floatval_(0xFF800001);
-float plusNaN = floatval_(0x7F800001);
-float plusZero = floatval_(0x00000000);
-float minusZero = floatval_(0x80000000);
-
-float oneNaN = floatval_(0x7FC00001);
-float zeroDivZero = floatval_(0xFFC00000);
-float floatMin = floatval_(0x00800000);
-float floatMax = floatval_(0x7F7FFFFF);
-
-float posSubnormalMin = floatval_(0x00000001);
-float posSubnormalMax = floatval_(0x007FFFFF);
-float negSubnormalMin = floatval_(0x80000001);
-float negSubnormalMax = floatval_(0x807FFFFF);
-
-template <bool Signed>
-cv::Mat exec_float32_to_int8(cv::Mat& input) {
-  cv::Mat result;
-  input.convertTo(result, Signed ? CV_8SC1 : CV_8UC1);
-  return result;
-}
-
-#if MANAGER
-template <bool Signed, size_t Channels>
-bool test_float32_to_int8_fill(int index, RecreatedMessageQueue& request_queue,
-                               RecreatedMessageQueue& reply_queue) {
-  cv::RNG rng(0);
-
-  for (size_t x = 5; x <= 16; ++x) {
-    for (size_t y = 5; y <= 16; ++y) {
-      cv::Mat input(x, y, CV_32FC(Channels));
-      rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000);
-
-      cv::Mat actual = exec_float32_to_int8<Signed>(input);
-      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
-                                                       reply_queue, input);
-
-      if (are_matrices_different<uint8_t>(0, actual, expected)) {
-        fail_print_matrices(x, y, input, actual, expected);
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-static constexpr int custom_data_height = 8;
-static constexpr int custom_data_width = 4;
-
-static float custom_data[custom_data_height * custom_data_width] = {
-    // clang-format off
-  quietNaN, signalingNaN, posInfinity, negInfinity,
-  minusNaN, plusNaN, plusZero, minusZero,
-  oneNaN, zeroDivZero, floatMin, floatMax,
-  posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax,
-  1111.11, -1112.22, 113.33, 114.44,
-  111.51, 112.62, 113.73, 114.84,
-  126.66, 127.11, 128.66, 129.11,
-  11.5, 12.5, -11.5, -12.5,
-    // clang-format on
-};
-
-template <bool Signed>
-bool test_float32_to_int8_custom(int index,
-                                 RecreatedMessageQueue& request_queue,
-                                 RecreatedMessageQueue& reply_queue) {
-  cv::Mat input(custom_data_height, custom_data_width, CV_32FC1, custom_data);
-
-  cv::Mat actual = exec_float32_to_int8<Signed>(input);
-  cv::Mat expected =
-      get_expected_from_subordinate(index, request_queue, reply_queue, input);
-
-  if (are_matrices_different<uint8_t>(0, actual, expected)) {
-    fail_print_matrices(custom_data_height, custom_data_width, input, actual,
-                        expected);
-    return true;
-  }
-
-  return false;
-}
-#endif
-
-std::vector<test>& float_to_int_tests_singleton() {
-  // clang-format off
-  static std::vector<test> tests = {
-    TEST("Float32 to Signed Int8, fill, 1 channel", (test_float32_to_int8_fill<true, 1>), exec_float32_to_int8<true>),
-    TEST("Float32 to Signed Int8, fill, 2 channel", (test_float32_to_int8_fill<true, 2>), exec_float32_to_int8<true>),
-    TEST("Float32 to Signed Int8, fill, 3 channel", (test_float32_to_int8_fill<true, 3>), exec_float32_to_int8<true>),
-    TEST("Float32 to Signed Int8, fill, 4 channel", (test_float32_to_int8_fill<true, 4>), exec_float32_to_int8<true>),
-
-    TEST("Float32 to Unsigned Int8, fill, 1 channel", (test_float32_to_int8_fill<false, 1>), exec_float32_to_int8<false>),
-    TEST("Float32 to Unsigned Int8, fill, 2 channel", (test_float32_to_int8_fill<false, 2>), exec_float32_to_int8<false>),
-    TEST("Float32 to Unsigned Int8, fill, 3 channel", (test_float32_to_int8_fill<false, 3>), exec_float32_to_int8<false>),
-    TEST("Float32 to Unsigned Int8, fill, 4 channel", (test_float32_to_int8_fill<false, 4>), exec_float32_to_int8<false>),
-
-    TEST("Float32 to Signed Int8, custom (special)", test_float32_to_int8_custom<true>, exec_float32_to_int8<true>),
-    TEST("Float32 to Unsigned Int8, custom (special)", test_float32_to_int8_custom<false>, exec_float32_to_int8<false>),
-  };
-  // clang-format on
-  return tests;
-}
diff --git a/conformity/opencv/test_float_to_int.h b/conformity/opencv/test_float_to_int.h
deleted file mode 100644
index 0fb92611c..000000000
--- a/conformity/opencv/test_float_to_int.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_TO_INT_H_
-#define INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_TO_INT_H_
-
-#include <vector>
-
-#include "tests.h"
-
-std::vector<test>& float_to_int_tests_singleton();
-
-#endif  // INTRINSICCV_OPENCV_CONFORMITY_TEST_FLOAT_TO_INT_H_
diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp
index b3db6055f..596282bfb 100644
--- a/conformity/opencv/tests.cpp
+++ b/conformity/opencv/tests.cpp
@@ -10,7 +10,7 @@
 
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
-#include "test_float_to_int.h"
+#include "test_float_conv.h"
 #include "test_gaussian_blur.h"
 #include "test_sobel.h"
 
@@ -44,10 +44,10 @@ cv::Mat get_expected_from_subordinate(int index,
 
 template <typename T>
 static std::vector<T> merge_tests(
-    std::initializer_list<std::vector<test>& (*)()> test_groups) {
+    std::initializer_list<std::vector<T>& (*)()> test_groups) {
   std::vector<T> all_tests;
   for (auto singleton : test_groups) {
-    std::vector<test>& group = singleton();
+    std::vector<T>& group = singleton();
     all_tests.insert(all_tests.cend(), group.cbegin(), group.cend());
   }
   return all_tests;
@@ -56,7 +56,7 @@ static std::vector<T> merge_tests(
 std::vector<test> all_tests = merge_tests<test>({
     sobel_tests_singleton,
     gaussian_blur_tests_singleton,
-    float_to_int_tests_singleton,
+    float_conversion_tests_singleton,
 });
 
 #if MANAGER
diff --git a/intrinsiccv/include/intrinsiccv/config.h.in b/intrinsiccv/include/intrinsiccv/config.h.in
index dd521b82a..068c88b66 100644
--- a/intrinsiccv/include/intrinsiccv/config.h.in
+++ b/intrinsiccv/include/intrinsiccv/config.h.in
@@ -90,15 +90,4 @@
 #define INTRINSICCV_NODISCARD
 #endif
 
-// GCC and clang
-#ifdef __GNUC__
-#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN \
-  _Pragma("GCC diagnostic push")             \
-      _Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"")
-#define INTRINSICCV_NO_STRICT_ALIASING_END _Pragma("GCC diagnostic pop")
-#else
-#define INTRINSICCV_NO_STRICT_ALIASING_BEGIN
-#define INTRINSICCV_NO_STRICT_ALIASING_END
-#endif
-
 #endif  // INTRINSICCV_CONFIG_H
diff --git a/intrinsiccv/include/intrinsiccv/dispatch.h b/intrinsiccv/include/intrinsiccv/dispatch.h
index 486f934ac..c85b14198 100644
--- a/intrinsiccv/include/intrinsiccv/dispatch.h
+++ b/intrinsiccv/include/intrinsiccv/dispatch.h
@@ -60,7 +60,7 @@ static inline bool hwcaps_has_sme2(HwCaps hwcaps) {
 
 #define INTRINSICCV_MULTIVERSION_C_API(api_name, neon_impl, sve2_impl, \
                                        sme2_impl)                      \
-  static decltype(neon_impl) *api_name##_resolver() {                  \
+  static decltype(neon_impl) api_name##_resolver() {                   \
     [[maybe_unused]] INTRINSICCV_TARGET_NAMESPACE::HwCaps hwcaps =     \
         INTRINSICCV_TARGET_NAMESPACE::get_hwcaps();                    \
     INTRINSICCV_SME2_RESOLVE(sme2_impl);                               \
@@ -68,7 +68,7 @@ static inline bool hwcaps_has_sme2(HwCaps hwcaps) {
     return neon_impl;                                                  \
   }                                                                    \
   extern "C" {                                                         \
-  decltype(neon_impl) *api_name = api_name##_resolver();               \
+  decltype(neon_impl) api_name = api_name##_resolver();                \
   }
 
 #else  // INTRINSICCV_HAVE_SVE2 || INTRINSICCV_HAVE_SME2
@@ -77,7 +77,7 @@ static inline bool hwcaps_has_sme2(HwCaps hwcaps) {
                                        sme2_impl)                      \
                                                                        \
   extern "C" {                                                         \
-  decltype(neon_impl) *api_name = neon_impl;                           \
+  decltype(neon_impl) api_name = neon_impl;                            \
   }
 
 #endif  // INTRINSICCV_HAVE_SVE2 || INTRINSICCV_HAVE_SME2
diff --git a/intrinsiccv/include/intrinsiccv/intrinsiccv.h b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
index 63d969df3..bc8a02fd6 100644
--- a/intrinsiccv/include/intrinsiccv/intrinsiccv.h
+++ b/intrinsiccv/include/intrinsiccv/intrinsiccv.h
@@ -1272,10 +1272,11 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src,
 /// type, then stores the result in `dst`.
 ///
 /// Each resulting element is saturated, i.e. it is the smallest/largest
-/// number of the type of the element if the result would underflow/overflow.
-/// In case of special values, such as the different variations of `NaN`, the
-/// result is `0`. Source and destination data length is `width` * `height`.
-/// Number of elements is limited to @ref INTRINSICCV_MAX_IMAGE_PIXELS.
+/// number of the type of the element if the `src` data type cannot be
+/// represented as the `dst` type. In case of special values, such as the
+/// different variations of `NaN`, the result is `0`. Source and destination
+/// data length is `width` * `height`. Number of elements is limited to @ref
+/// INTRINSICCV_MAX_IMAGE_PIXELS.
 ///
 /// @param src          Pointer to the source data. Must be non-null.
 /// @param src_stride   Distance in bytes from the start of one row to the
@@ -1288,14 +1289,44 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_scale_u8, const uint8_t *src,
 /// @param width        Number of elements in a row.
 /// @param height       Number of rows in the data.
 ///
-INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_s8,
+INTRINSICCV_API_DECLARATION(intrinsiccv_float_conversion_f32_s8,
                             const float *src, size_t src_stride, int8_t *dst,
                             size_t dst_stride, size_t width, size_t height);
-/// @copydoc intrinsiccv_type_conversion_f32_s8
-INTRINSICCV_API_DECLARATION(intrinsiccv_type_conversion_f32_u8,
+/// @copydoc intrinsiccv_float_conversion_f32_s8
+INTRINSICCV_API_DECLARATION(intrinsiccv_float_conversion_f32_u8,
                             const float *src, size_t src_stride, uint8_t *dst,
                             size_t dst_stride, size_t width, size_t height);
 
+/// Converts the elements in `src` from an integer type to a floating-point
+/// type, then stores the result in `dst`.
+///
+/// Each resulting element is saturated, i.e. it is the smallest/largest
+/// number of the type of the element if the `src` data type cannot be
+/// represented as the `dst` type. Source and destination data length is `width`
+/// * `height`. Number of elements is limited to @ref
+/// INTRINSICCV_MAX_IMAGE_PIXELS.
+///
+/// @param src          Pointer to the source data. Must be non-null.
+/// @param src_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the source data. Must
+///                     not be less than width * sizeof(type).
+///                     Must be a multiple of sizeof(type).
+/// @param dst          Pointer to the destination data. Must be non-null.
+/// @param dst_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the destination data. Must
+///                     not be less than width * sizeof(type).
+///                     Must be a multiple of sizeof(type).
+/// @param width        Number of pixels in a row.
+/// @param height       Number of rows in the data.
+///
+INTRINSICCV_API_DECLARATION(intrinsiccv_float_conversion_s8_f32,
+                            const int8_t *src, size_t src_stride, float *dst,
+                            size_t dst_stride, size_t width, size_t height);
+/// @copydoc intrinsiccv_float_conversion_s8_f32
+INTRINSICCV_API_DECLARATION(intrinsiccv_float_conversion_u8_f32,
+                            const uint8_t *src, size_t src_stride, float *dst,
+                            size_t dst_stride, size_t width, size_t height);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/intrinsiccv/src/analysis/min_max_api.cpp b/intrinsiccv/src/analysis/min_max_api.cpp
index 093380d7b..513fe4916 100644
--- a/intrinsiccv/src/analysis/min_max_api.cpp
+++ b/intrinsiccv/src/analysis/min_max_api.cpp
@@ -27,8 +27,8 @@ namespace sme2 {}  // namespace sme2
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_MINMAX_API(name, type)                        \
-  INTRINSICCV_MULTIVERSION_C_API(name, intrinsiccv::neon::min_max<type>, \
+#define INTRINSICCV_DEFINE_MINMAX_API(name, type)                         \
+  INTRINSICCV_MULTIVERSION_C_API(name, &intrinsiccv::neon::min_max<type>, \
                                  nullptr, nullptr)
 
 INTRINSICCV_DEFINE_MINMAX_API(intrinsiccv_min_max_u8, uint8_t);
@@ -37,8 +37,8 @@ INTRINSICCV_DEFINE_MINMAX_API(intrinsiccv_min_max_u16, uint16_t);
 INTRINSICCV_DEFINE_MINMAX_API(intrinsiccv_min_max_s16, int16_t);
 INTRINSICCV_DEFINE_MINMAX_API(intrinsiccv_min_max_s32, int32_t);
 
-#define INTRINSICCV_DEFINE_MINMAXLOC_API(name, type)                         \
-  INTRINSICCV_MULTIVERSION_C_API(name, intrinsiccv::neon::min_max_loc<type>, \
+#define INTRINSICCV_DEFINE_MINMAXLOC_API(name, type)                          \
+  INTRINSICCV_MULTIVERSION_C_API(name, &intrinsiccv::neon::min_max_loc<type>, \
                                  nullptr, nullptr)
 
 INTRINSICCV_DEFINE_MINMAXLOC_API(intrinsiccv_min_max_loc_u8, uint8_t);
diff --git a/intrinsiccv/src/arithmetics/absdiff_api.cpp b/intrinsiccv/src/arithmetics/absdiff_api.cpp
index f8e94809f..86fa56fc4 100644
--- a/intrinsiccv/src/arithmetics/absdiff_api.cpp
+++ b/intrinsiccv/src/arithmetics/absdiff_api.cpp
@@ -39,11 +39,11 @@ intrinsiccv_error_t saturating_absdiff(const T *src_a, size_t src_a_stride,
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_C_API(name, type)                                 \
-  INTRINSICCV_MULTIVERSION_C_API(                                            \
-      name, intrinsiccv::neon::saturating_absdiff<type>,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::saturating_absdiff<type>), \
-      intrinsiccv::sme2::saturating_absdiff<type>)
+#define INTRINSICCV_DEFINE_C_API(name, type)                                  \
+  INTRINSICCV_MULTIVERSION_C_API(                                             \
+      name, &intrinsiccv::neon::saturating_absdiff<type>,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::saturating_absdiff<type>), \
+      &intrinsiccv::sme2::saturating_absdiff<type>)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_absdiff_u8, uint8_t);
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_absdiff_s8, int8_t);
diff --git a/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp b/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp
index 1a60b7956..d9bc53e77 100644
--- a/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp
+++ b/intrinsiccv/src/arithmetics/add_abs_with_threshold_api.cpp
@@ -34,12 +34,12 @@ intrinsiccv_error_t saturating_add_abs_with_threshold(
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_C_API(name, type)                            \
-  INTRINSICCV_MULTIVERSION_C_API(                                       \
-      name, intrinsiccv::neon::saturating_add_abs_with_threshold<type>, \
-      INTRINSICCV_SVE2_IMPL_IF(                                         \
-          intrinsiccv::sve2::saturating_add_abs_with_threshold<type>),  \
-      intrinsiccv::sme2::saturating_add_abs_with_threshold<type>)
+#define INTRINSICCV_DEFINE_C_API(name, type)                             \
+  INTRINSICCV_MULTIVERSION_C_API(                                        \
+      name, &intrinsiccv::neon::saturating_add_abs_with_threshold<type>, \
+      INTRINSICCV_SVE2_IMPL_IF(                                          \
+          &intrinsiccv::sve2::saturating_add_abs_with_threshold<type>),  \
+      &intrinsiccv::sme2::saturating_add_abs_with_threshold<type>)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_add_abs_with_threshold_s16,
                          int16_t);
diff --git a/intrinsiccv/src/arithmetics/add_api.cpp b/intrinsiccv/src/arithmetics/add_api.cpp
index adf06cd04..355e065b0 100644
--- a/intrinsiccv/src/arithmetics/add_api.cpp
+++ b/intrinsiccv/src/arithmetics/add_api.cpp
@@ -39,11 +39,11 @@ intrinsiccv_error_t saturating_add(const T *src_a, size_t src_a_stride,
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_C_API(name, type)                             \
-  INTRINSICCV_MULTIVERSION_C_API(                                        \
-      name, intrinsiccv::neon::saturating_add<type>,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::saturating_add<type>), \
-      intrinsiccv::sme2::saturating_add<type>)
+#define INTRINSICCV_DEFINE_C_API(name, type)                              \
+  INTRINSICCV_MULTIVERSION_C_API(                                         \
+      name, &intrinsiccv::neon::saturating_add<type>,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::saturating_add<type>), \
+      &intrinsiccv::sme2::saturating_add<type>)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_add_s8, int8_t);
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_add_u8, uint8_t);
diff --git a/intrinsiccv/src/arithmetics/multiply_api.cpp b/intrinsiccv/src/arithmetics/multiply_api.cpp
index 92ea6da21..d68b18afb 100644
--- a/intrinsiccv/src/arithmetics/multiply_api.cpp
+++ b/intrinsiccv/src/arithmetics/multiply_api.cpp
@@ -40,10 +40,10 @@ intrinsiccv_error_t saturating_multiply(const T *src_a, size_t src_a_stride,
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_C_API(name, type)                                  \
-  INTRINSICCV_MULTIVERSION_C_API(                                             \
-      name, intrinsiccv::neon::saturating_multiply<type>,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::saturating_multiply<type>), \
+#define INTRINSICCV_DEFINE_C_API(name, type)                                   \
+  INTRINSICCV_MULTIVERSION_C_API(                                              \
+      name, &intrinsiccv::neon::saturating_multiply<type>,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::saturating_multiply<type>), \
       nullptr)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_multiply_u8, uint8_t);
diff --git a/intrinsiccv/src/arithmetics/scale_api.cpp b/intrinsiccv/src/arithmetics/scale_api.cpp
index e3c1775b5..0af9b97be 100644
--- a/intrinsiccv/src/arithmetics/scale_api.cpp
+++ b/intrinsiccv/src/arithmetics/scale_api.cpp
@@ -22,8 +22,8 @@ namespace sme2 {}  // namespace sme2
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_SCALE_API(name, type)                       \
-  INTRINSICCV_MULTIVERSION_C_API(name, intrinsiccv::neon::scale<type>, \
+#define INTRINSICCV_DEFINE_SCALE_API(name, type)                        \
+  INTRINSICCV_MULTIVERSION_C_API(name, &intrinsiccv::neon::scale<type>, \
                                  nullptr, nullptr)
 
 INTRINSICCV_DEFINE_SCALE_API(intrinsiccv_scale_u8, uint8_t);
diff --git a/intrinsiccv/src/arithmetics/sub_api.cpp b/intrinsiccv/src/arithmetics/sub_api.cpp
index 691d6ca4b..9b451636d 100644
--- a/intrinsiccv/src/arithmetics/sub_api.cpp
+++ b/intrinsiccv/src/arithmetics/sub_api.cpp
@@ -38,11 +38,11 @@ intrinsiccv_error_t saturating_sub(const T *src_a, size_t src_a_stride,
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_C_API(name, type)                             \
-  INTRINSICCV_MULTIVERSION_C_API(                                        \
-      name, intrinsiccv::neon::saturating_sub<type>,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::saturating_sub<type>), \
-      intrinsiccv::sme2::saturating_sub<type>)
+#define INTRINSICCV_DEFINE_C_API(name, type)                              \
+  INTRINSICCV_MULTIVERSION_C_API(                                         \
+      name, &intrinsiccv::neon::saturating_sub<type>,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::saturating_sub<type>), \
+      &intrinsiccv::sme2::saturating_sub<type>)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_sub_s8, int8_t);
 INTRINSICCV_DEFINE_C_API(intrinsiccv_saturating_sub_u8, uint8_t);
diff --git a/intrinsiccv/src/arithmetics/threshold_api.cpp b/intrinsiccv/src/arithmetics/threshold_api.cpp
index 8338f3a72..f7e70fa68 100644
--- a/intrinsiccv/src/arithmetics/threshold_api.cpp
+++ b/intrinsiccv/src/arithmetics/threshold_api.cpp
@@ -30,10 +30,10 @@ intrinsiccv_error_t threshold_binary(const T *src, size_t src_stride, T *dst,
 
 }  // namespace intrinsiccv
 
-#define INTRINSICCV_DEFINE_C_API(name, type)                               \
-  INTRINSICCV_MULTIVERSION_C_API(                                          \
-      name, intrinsiccv::neon::threshold_binary<type>,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::threshold_binary<type>), \
-      intrinsiccv::sme2::threshold_binary<type>)
+#define INTRINSICCV_DEFINE_C_API(name, type)                                \
+  INTRINSICCV_MULTIVERSION_C_API(                                           \
+      name, &intrinsiccv::neon::threshold_binary<type>,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::threshold_binary<type>), \
+      &intrinsiccv::sme2::threshold_binary<type>)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_threshold_binary_u8, uint8_t);
diff --git a/intrinsiccv/src/arithmetics/transpose_api.cpp b/intrinsiccv/src/arithmetics/transpose_api.cpp
index 86f7d3ced..7fab64663 100644
--- a/intrinsiccv/src/arithmetics/transpose_api.cpp
+++ b/intrinsiccv/src/arithmetics/transpose_api.cpp
@@ -7,4 +7,4 @@
 #include "intrinsiccv/intrinsiccv.h"
 
 INTRINSICCV_MULTIVERSION_C_API(intrinsiccv_transpose,
-                               intrinsiccv::neon::transpose, nullptr, nullptr);
+                               &intrinsiccv::neon::transpose, nullptr, nullptr);
diff --git a/intrinsiccv/src/conversions/float_conv_api.cpp b/intrinsiccv/src/conversions/float_conv_api.cpp
new file mode 100644
index 000000000..165fa72c3
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_conv_api.cpp
@@ -0,0 +1,71 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "intrinsiccv/dispatch.h"
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/types.h"
+
+namespace intrinsiccv {
+
+namespace neon {
+
+template <typename InputType, typename OutputType>
+intrinsiccv_error_t float_conversion(const InputType* src, size_t src_stride,
+                                     OutputType* dst, size_t dst_stride,
+                                     size_t width, size_t height);
+
+}  // namespace neon
+
+namespace sve2 {
+
+template <typename InputType, typename OutputType>
+intrinsiccv_error_t float_conversion(const InputType* src, size_t src_stride,
+                                     OutputType* dst, size_t dst_stride,
+                                     size_t width, size_t height);
+
+}  // namespace sve2
+
+namespace sme2 {
+
+template <typename InputType, typename OutputType>
+intrinsiccv_error_t float_conversion(const InputType* src, size_t src_stride,
+                                     OutputType* dst, size_t dst_stride,
+                                     size_t width, size_t height);
+
+}  // namespace sme2
+
+#ifdef INTRINSICCV_HAVE_SVE2
+#define SVE2_FUNC_POINTER(name, itype, otype)                \
+  [[maybe_unused]] static auto sve2_func_##itype##_##otype = \
+      intrinsiccv::sve2::float_conversion<itype, otype>;
+#else
+#define SVE2_FUNC_POINTER(name, itype, otype)
+#endif  // INTRINSICCV_HAVE_SVE2
+
+#ifdef INTRINSICCV_HAVE_SME2
+#define SME2_FUNC_POINTER(name, itype, otype) \
+  static auto sme2_func_##itype##_##otype =   \
+      intrinsiccv::sme2::float_conversion<itype, otype>;
+#else
+#define SME2_FUNC_POINTER(name, itype, otype)
+#endif  // INTRINSICCV_HAVE_SME2
+
+// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
+#define INTRINSICCV_DEFINE_C_API(name, itype, otype)         \
+  static auto neon_func_##itype##_##otype =                  \
+      intrinsiccv::neon::float_conversion<itype, otype>;     \
+  SVE2_FUNC_POINTER(name, itype, otype);                     \
+  SME2_FUNC_POINTER(name, itype, otype);                     \
+  INTRINSICCV_MULTIVERSION_C_API(                            \
+      name, neon_func_##itype##_##otype,                     \
+      INTRINSICCV_SVE2_IMPL_IF(sve2_func_##itype##_##otype), \
+      sme2_func_##itype##_##otype)
+// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
+
+INTRINSICCV_DEFINE_C_API(intrinsiccv_float_conversion_f32_s8, float, int8_t);
+INTRINSICCV_DEFINE_C_API(intrinsiccv_float_conversion_f32_u8, float, uint8_t);
+INTRINSICCV_DEFINE_C_API(intrinsiccv_float_conversion_s8_f32, int8_t, float);
+INTRINSICCV_DEFINE_C_API(intrinsiccv_float_conversion_u8_f32, uint8_t, float);
+
+}  // namespace intrinsiccv
diff --git a/intrinsiccv/src/conversions/float_conv_neon.cpp b/intrinsiccv/src/conversions/float_conv_neon.cpp
new file mode 100644
index 000000000..d500d8334
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_conv_neon.cpp
@@ -0,0 +1,27 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/neon.h"
+
+namespace intrinsiccv::neon {
+
+template <typename InputType, typename OutputType>
+intrinsiccv_error_t float_conversion(const InputType*, size_t, OutputType*,
+                                     size_t, size_t, size_t) {
+  return INTRINSICCV_ERROR_NOT_IMPLEMENTED;
+}
+
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(itype, otype)                        \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                    \
+  float_conversion<itype, otype>(const itype* src, size_t src_stride,         \
+                                 otype* dst, size_t dst_stride, size_t width, \
+                                 size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(float, int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(float, uint8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t, float);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t, float);
+
+}  // namespace intrinsiccv::neon
diff --git a/intrinsiccv/src/conversions/float_conv_sc.h b/intrinsiccv/src/conversions/float_conv_sc.h
new file mode 100644
index 000000000..b2190b0a2
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_conv_sc.h
@@ -0,0 +1,166 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef INTRINSICCV_FLOAT_CONV_SC_H
+#define INTRINSICCV_FLOAT_CONV_SC_H
+
+#include <limits>
+#include <type_traits>
+
+#include "intrinsiccv/intrinsiccv.h"
+#include "intrinsiccv/sve2.h"
+
+namespace INTRINSICCV_TARGET_NAMESPACE {
+
+template <typename InputType, typename OutputType>
+class float_conversion_operation;
+
+template <typename OutputType>
+class float_conversion_operation<float, OutputType> {
+ public:
+  using SrcVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<float>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using IntermediateVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<
+      std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
+  using IntermediateVectorType = typename IntermediateVecTraits::VectorType;
+
+  void process_row(size_t width, Columns<const float> src,
+                   Columns<OutputType> dst) INTRINSICCV_STREAMING_COMPATIBLE {
+    LoopUnroll{width, SrcVecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE {
+          svbool_t pg = SrcVecTraits::svptrue();
+          SrcVectorType src_vector1 = svld1(pg, &src[0]);
+          SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1);
+          IntermediateVectorType result_vector1 =
+              vector_path<OutputType>(pg, src_vector1);
+          IntermediateVectorType result_vector2 =
+              vector_path<OutputType>(pg, src_vector2);
+          svst1b(pg, &dst[0], result_vector1);
+          svst1b_vnum(pg, &dst[0], 1, result_vector2);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE {
+          size_t index = 0;
+          svbool_t pg = SrcVecTraits::svwhilelt(index, length);
+          while (svptest_first(SrcVecTraits::svptrue(), pg)) {
+            SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
+            IntermediateVectorType result_vector =
+                vector_path<OutputType>(pg, src_vector);
+            svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
+            // Update loop counter and calculate the next governing predicate.
+            index += SrcVecTraits::num_lanes();
+            pg = SrcVecTraits::svwhilelt(index, length);
+          }
+        });
+  }
+
+ private:
+  template <
+      typename O,
+      std::enable_if_t<std::is_integral_v<O> && std::is_signed_v<O>, int> = 0>
+  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
+      INTRINSICCV_STREAMING_COMPATIBLE {
+    constexpr float min_val = std::numeric_limits<O>::min();
+    constexpr float max_val = std::numeric_limits<O>::max();
+
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t less = svcmplt_n_f32(pg, src, min_val);
+    src = svdup_n_f32_m(src, less, min_val);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
+    src = svdup_n_f32_m(src, greater, max_val);
+
+    return svcvt_s32_f32_x(pg, src);
+  }
+
+  template <
+      typename O,
+      std::enable_if_t<std::is_integral_v<O> && !std::is_signed_v<O>, int> = 0>
+  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
+      INTRINSICCV_STREAMING_COMPATIBLE {
+    constexpr float max_val = std::numeric_limits<O>::max();
+
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
+    src = svdup_n_f32_m(src, greater, max_val);
+
+    return svcvt_u32_f32_x(pg, src);
+  }
+};  // end of class float_conversion_operation<uint8_t>
+
+template <typename InputType>
+class float_conversion_operation<InputType, float> {
+ public:
+  using VecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<float>;
+  using VectorType = typename VecTraits::VectorType;
+  void process_row(size_t width, Columns<const InputType> src,
+                   Columns<float> dst) {
+    LoopUnroll{width, VecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE {
+          svbool_t pg = VecTraits::svptrue();
+          VectorType dst_vector1 = vector_path<InputType>(pg, &src[0]);
+          VectorType dst_vector2 = vector_path<InputType>(
+              pg, &src.at(ptrdiff_t(VecTraits::num_lanes()))[0]);
+          svst1(pg, &dst[0], dst_vector1);
+          svst1_vnum(pg, &dst[0], 1, dst_vector2);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE {
+          size_t index = 0;
+          svbool_t pg = VecTraits::svwhilelt(index, length);
+          while (svptest_first(VecTraits::svptrue(), pg)) {
+            VectorType dst_vector =
+                vector_path<InputType>(pg, &src[ptrdiff_t(index)]);
+            svst1(pg, &dst[ptrdiff_t(index)], dst_vector);
+            // Update loop counter and calculate the next governing predicate.
+            index += VecTraits::num_lanes();
+            pg = VecTraits::svwhilelt(index, length);
+          }
+        });
+  }
+
+ private:
+  template <
+      typename I,
+      std::enable_if_t<std::is_integral_v<I> && std::is_signed_v<I>, int> = 0>
+  VectorType vector_path(svbool_t& pg,
+                         const I* src) INTRINSICCV_STREAMING_COMPATIBLE {
+    svint32_t src_vector = svld1sb_s32(pg, src);
+    return svcvt_f32_s32_x(pg, src_vector);
+  }
+
+  template <
+      typename I,
+      std::enable_if_t<std::is_integral_v<I> && !std::is_signed_v<I>, int> = 0>
+  VectorType vector_path(svbool_t& pg,
+                         const I* src) INTRINSICCV_STREAMING_COMPATIBLE {
+    svuint32_t src_vector = svld1ub_u32(pg, src);
+    return svcvt_f32_u32_x(pg, src_vector);
+  }
+};
+
+template <typename I, typename O>
+static intrinsiccv_error_t float_conversion_sc(
+    const I* src, size_t src_stride, O* dst, size_t dst_stride, size_t width,
+    size_t height) INTRINSICCV_STREAMING_COMPATIBLE {
+  CHECK_POINTER_AND_STRIDE(src, src_stride);
+  CHECK_POINTER_AND_STRIDE(dst, dst_stride);
+  CHECK_IMAGE_SIZE(width, height);
+
+  float_conversion_operation<I, O> operation;
+  Rectangle rect{width, height};
+  Rows<const I> src_rows{src, src_stride};
+  Rows<O> dst_rows{dst, dst_stride};
+  zip_rows(operation, rect, src_rows, dst_rows);
+
+  return INTRINSICCV_OK;
+}
+
+}  // namespace INTRINSICCV_TARGET_NAMESPACE
+
+#endif  // INTRINSICCV_FLOAT_CONV_SC_H
diff --git a/intrinsiccv/src/conversions/float_conv_sme2.cpp b/intrinsiccv/src/conversions/float_conv_sme2.cpp
new file mode 100644
index 000000000..c44fd8d5c
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_conv_sme2.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_conv_sc.h"
+
+namespace intrinsiccv::sme2 {
+
+template <typename InputType, typename OutputType>
+INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
+float_conversion(const InputType* src, size_t src_stride, OutputType* dst,
+                 size_t dst_stride, size_t width, size_t height) {
+  return float_conversion_sc<InputType, OutputType>(src, src_stride, dst,
+                                                    dst_stride, width, height);
+}
+
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(itype, otype)                        \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                    \
+  float_conversion<itype, otype>(const itype* src, size_t src_stride,         \
+                                 otype* dst, size_t dst_stride, size_t width, \
+                                 size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(float, int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(float, uint8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t, float);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t, float);
+
+}  // namespace intrinsiccv::sme2
diff --git a/intrinsiccv/src/conversions/float_conv_sve2.cpp b/intrinsiccv/src/conversions/float_conv_sve2.cpp
new file mode 100644
index 000000000..7005f18a5
--- /dev/null
+++ b/intrinsiccv/src/conversions/float_conv_sve2.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_conv_sc.h"
+
+namespace intrinsiccv::sve2 {
+
+template <typename InputType, typename OutputType>
+INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
+float_conversion(const InputType* src, size_t src_stride, OutputType* dst,
+                 size_t dst_stride, size_t width, size_t height) {
+  return float_conversion_sc<InputType, OutputType>(src, src_stride, dst,
+                                                    dst_stride, width, height);
+}
+
+#define INTRINSICCV_INSTANTIATE_TEMPLATE(itype, otype)                        \
+  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                    \
+  float_conversion<itype, otype>(const itype* src, size_t src_stride,         \
+                                 otype* dst, size_t dst_stride, size_t width, \
+                                 size_t height)
+
+INTRINSICCV_INSTANTIATE_TEMPLATE(float, int8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(float, uint8_t);
+INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t, float);
+INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t, float);
+
+}  // namespace intrinsiccv::sve2
diff --git a/intrinsiccv/src/conversions/float_to_int_api.cpp b/intrinsiccv/src/conversions/float_to_int_api.cpp
deleted file mode 100644
index 5503ecb8d..000000000
--- a/intrinsiccv/src/conversions/float_to_int_api.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include "intrinsiccv/dispatch.h"
-#include "intrinsiccv/intrinsiccv.h"
-#include "intrinsiccv/types.h"
-
-namespace intrinsiccv {
-
-namespace neon {
-
-template <typename T>
-intrinsiccv_error_t type_conversion_float_to_int(const float* src,
-                                                 size_t src_stride, T* dst,
-                                                 size_t dst_stride,
-                                                 size_t width, size_t height);
-
-}  // namespace neon
-
-namespace sve2 {
-
-template <typename T>
-intrinsiccv_error_t type_conversion_float_to_int(const float* src,
-                                                 size_t src_stride, T* dst,
-                                                 size_t dst_stride,
-                                                 size_t width, size_t height);
-
-}  // namespace sve2
-
-namespace sme2 {
-
-template <typename T>
-intrinsiccv_error_t type_conversion_float_to_int(const float* src,
-                                                 size_t src_stride, T* dst,
-                                                 size_t dst_stride,
-                                                 size_t width, size_t height);
-
-}  // namespace sme2
-
-#define INTRINSICCV_DEFINE_C_API(name, type)                       \
-  INTRINSICCV_MULTIVERSION_C_API(                                  \
-      name, intrinsiccv::neon::type_conversion_float_to_int<type>, \
-      INTRINSICCV_SVE2_IMPL_IF(                                    \
-          intrinsiccv::sve2::type_conversion_float_to_int<type>),  \
-      intrinsiccv::sme2::type_conversion_float_to_int<type>)
-
-INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_s8, int8_t);
-INTRINSICCV_DEFINE_C_API(intrinsiccv_type_conversion_f32_u8, uint8_t);
-
-}  // namespace intrinsiccv
diff --git a/intrinsiccv/src/conversions/float_to_int_neon.cpp b/intrinsiccv/src/conversions/float_to_int_neon.cpp
deleted file mode 100644
index abb3614c4..000000000
--- a/intrinsiccv/src/conversions/float_to_int_neon.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include "intrinsiccv/intrinsiccv.h"
-#include "intrinsiccv/neon.h"
-
-namespace intrinsiccv::neon {
-
-template <typename T>
-intrinsiccv_error_t type_conversion_float_to_int(const float*, size_t, T*,
-                                                 size_t, size_t, size_t) {
-  return INTRINSICCV_ERROR_NOT_IMPLEMENTED;
-}
-
-#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
-  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
-  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
-                                     type* dst, size_t dst_stride,        \
-                                     size_t width, size_t height)
-
-INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
-INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
-
-}  // namespace intrinsiccv::neon
diff --git a/intrinsiccv/src/conversions/float_to_int_sc.h b/intrinsiccv/src/conversions/float_to_int_sc.h
deleted file mode 100644
index ea43a59cb..000000000
--- a/intrinsiccv/src/conversions/float_to_int_sc.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef INTRINSICCV_FLOAT_TO_INT_SC_H
-#define INTRINSICCV_FLOAT_TO_INT_SC_H
-
-#include <type_traits>
-
-#include "intrinsiccv/intrinsiccv.h"
-#include "intrinsiccv/sve2.h"
-
-namespace INTRINSICCV_TARGET_NAMESPACE {
-
-template <typename OutputType>
-class float_to_int_operation final {
- public:
-  using SrcVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<float>;
-  using SrcVectorType = typename SrcVecTraits::VectorType;
-  using IntermediateVecTraits = INTRINSICCV_TARGET_NAMESPACE::VecTraits<
-      std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
-  using IntermediateVectorType = typename IntermediateVecTraits::VectorType;
-
-  using VecTraits = SrcVecTraits;
-
-  void process_row(size_t width, Columns<const float> src,
-                   Columns<OutputType> dst) INTRINSICCV_STREAMING_COMPATIBLE {
-    LoopUnroll{width, VecTraits::num_lanes()}
-        .unroll_twice([&](size_t step) INTRINSICCV_STREAMING_COMPATIBLE {
-          svbool_t pg = VecTraits::svptrue();
-          SrcVectorType src_vector1 = svld1(pg, &src[0]);
-          SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1);
-          IntermediateVectorType result_vector1 =
-              vector_path<OutputType>(pg, src_vector1);
-          IntermediateVectorType result_vector2 =
-              vector_path<OutputType>(pg, src_vector2);
-          svst1b(pg, &dst[0], result_vector1);
-          svst1b_vnum(pg, &dst[0], 1, result_vector2);
-          src += ptrdiff_t(step);
-          dst += ptrdiff_t(step);
-        })
-        .remaining([&](size_t length, size_t) INTRINSICCV_STREAMING_COMPATIBLE {
-          size_t index = 0;
-          svbool_t pg = VecTraits::svwhilelt(index, length);
-          while (svptest_first(VecTraits::svptrue(), pg)) {
-            SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
-            IntermediateVectorType result_vector =
-                vector_path<OutputType>(pg, src_vector);
-            svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
-            // Update loop counter and calculate the next governing predicate.
-            index += VecTraits::num_lanes();
-            pg = VecTraits::svwhilelt(index, length);
-          }
-        });
-  }
-
- private:
-  template <typename T, std::enable_if_t<std::is_same_v<int8_t, T>, int> = 0>
-  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
-      INTRINSICCV_STREAMING_COMPATIBLE {
-    src = svrinti_f32_x(pg, src);
-
-    svbool_t less = svcmplt_n_f32(pg, src, -128.0);
-    src = svdup_n_f32_m(src, less, -128.0);
-
-    svbool_t greater = svcmpgt_n_f32(pg, src, 127.0);
-    src = svdup_n_f32_m(src, greater, 127.0);
-
-    return svcvt_s32_f32_x(pg, src);
-  }
-
-  template <typename T, std::enable_if_t<std::is_same_v<uint8_t, T>, int> = 0>
-  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
-      INTRINSICCV_STREAMING_COMPATIBLE {
-    src = svrinti_f32_x(pg, src);
-
-    svbool_t greater = svcmpgt_n_f32(pg, src, 255.0);
-    src = svdup_n_f32_m(src, greater, 255.0);
-
-    return svcvt_u32_f32_x(pg, src);
-  }
-};  // end of class float_to_int_operation<OutputType>
-
-template <typename T>
-static intrinsiccv_error_t type_conversion_float_to_int_sc(
-    const float* src, size_t src_stride, T* dst, size_t dst_stride,
-    size_t width, size_t height) INTRINSICCV_STREAMING_COMPATIBLE {
-  CHECK_POINTER_AND_STRIDE(src, src_stride);
-  CHECK_POINTER_AND_STRIDE(dst, dst_stride);
-  CHECK_IMAGE_SIZE(width, height);
-
-  float_to_int_operation<T> operation;
-  Rectangle rect{width, height};
-  Rows<const float> src_rows{src, src_stride};
-  Rows<T> dst_rows{dst, dst_stride};
-  zip_rows(operation, rect, src_rows, dst_rows);
-
-  return INTRINSICCV_OK;
-}
-
-}  // namespace INTRINSICCV_TARGET_NAMESPACE
-
-#endif  // INTRINSICCV_FLOAT_TO_INT_SC_H
diff --git a/intrinsiccv/src/conversions/float_to_int_sme2.cpp b/intrinsiccv/src/conversions/float_to_int_sme2.cpp
deleted file mode 100644
index 64c704e58..000000000
--- a/intrinsiccv/src/conversions/float_to_int_sme2.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include "float_to_int_sc.h"
-
-namespace intrinsiccv::sme2 {
-
-template <typename T>
-INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
-type_conversion_float_to_int(const float* src, size_t src_stride, T* dst,
-                             size_t dst_stride, size_t width, size_t height) {
-  return type_conversion_float_to_int_sc<T>(src, src_stride, dst, dst_stride,
-                                            width, height);
-}
-
-#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
-  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
-  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
-                                     type* dst, size_t dst_stride,        \
-                                     size_t width, size_t height)
-
-INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
-INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
-
-}  // namespace intrinsiccv::sme2
diff --git a/intrinsiccv/src/conversions/float_to_int_sve2.cpp b/intrinsiccv/src/conversions/float_to_int_sve2.cpp
deleted file mode 100644
index a9eb25eef..000000000
--- a/intrinsiccv/src/conversions/float_to_int_sve2.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include "float_to_int_sc.h"
-
-namespace intrinsiccv::sve2 {
-
-template <typename T>
-INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t
-type_conversion_float_to_int(const float* src, size_t src_stride, T* dst,
-                             size_t dst_stride, size_t width, size_t height) {
-  return type_conversion_float_to_int_sc<T>(src, src_stride, dst, dst_stride,
-                                            width, height);
-}
-
-#define INTRINSICCV_INSTANTIATE_TEMPLATE(type)                            \
-  template INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t                \
-  type_conversion_float_to_int<type>(const float* src, size_t src_stride, \
-                                     type* dst, size_t dst_stride,        \
-                                     size_t width, size_t height)
-
-INTRINSICCV_INSTANTIATE_TEMPLATE(int8_t);
-INTRINSICCV_INSTANTIATE_TEMPLATE(uint8_t);
-
-}  // namespace intrinsiccv::sve2
diff --git a/intrinsiccv/src/conversions/gray_to_rgb_api.cpp b/intrinsiccv/src/conversions/gray_to_rgb_api.cpp
index ce33a9e2d..14e49476b 100644
--- a/intrinsiccv/src/conversions/gray_to_rgb_api.cpp
+++ b/intrinsiccv/src/conversions/gray_to_rgb_api.cpp
@@ -6,11 +6,11 @@
 #include "intrinsiccv/dispatch.h"
 #include "intrinsiccv/intrinsiccv.h"
 
-#define INTRINSICCV_DEFINE_C_API(name, partialname)             \
-  INTRINSICCV_MULTIVERSION_C_API(                               \
-      name, intrinsiccv::neon::partialname,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::partialname), \
-      intrinsiccv::sme2::partialname)
+#define INTRINSICCV_DEFINE_C_API(name, partialname)              \
+  INTRINSICCV_MULTIVERSION_C_API(                                \
+      name, &intrinsiccv::neon::partialname,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::partialname), \
+      &intrinsiccv::sme2::partialname)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_gray_to_rgb_u8, gray_to_rgb_u8);
 INTRINSICCV_DEFINE_C_API(intrinsiccv_gray_to_rgba_u8, gray_to_rgba_u8);
diff --git a/intrinsiccv/src/conversions/merge_api.cpp b/intrinsiccv/src/conversions/merge_api.cpp
index ec47a818b..d84ce62c9 100644
--- a/intrinsiccv/src/conversions/merge_api.cpp
+++ b/intrinsiccv/src/conversions/merge_api.cpp
@@ -6,5 +6,5 @@
 #include "intrinsiccv/dispatch.h"
 #include "intrinsiccv/intrinsiccv.h"
 
-INTRINSICCV_MULTIVERSION_C_API(intrinsiccv_merge, intrinsiccv::neon::merge,
+INTRINSICCV_MULTIVERSION_C_API(intrinsiccv_merge, &intrinsiccv::neon::merge,
                                nullptr, nullptr);
diff --git a/intrinsiccv/src/conversions/rgb_to_rgb_api.cpp b/intrinsiccv/src/conversions/rgb_to_rgb_api.cpp
index 4563975a0..434ea29b3 100644
--- a/intrinsiccv/src/conversions/rgb_to_rgb_api.cpp
+++ b/intrinsiccv/src/conversions/rgb_to_rgb_api.cpp
@@ -7,11 +7,11 @@
 #include "intrinsiccv/intrinsiccv.h"
 #include "intrinsiccv/types.h"
 
-#define INTRINSICCV_DEFINE_C_API(name, partialname)             \
-  INTRINSICCV_MULTIVERSION_C_API(                               \
-      name, intrinsiccv::neon::partialname,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::partialname), \
-      intrinsiccv::sme2::partialname)
+#define INTRINSICCV_DEFINE_C_API(name, partialname)              \
+  INTRINSICCV_MULTIVERSION_C_API(                                \
+      name, &intrinsiccv::neon::partialname,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::partialname), \
+      &intrinsiccv::sme2::partialname)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_rgb_to_bgr_u8, rgb_to_bgr_u8);
 INTRINSICCV_DEFINE_C_API(intrinsiccv_rgba_to_bgra_u8, rgba_to_bgra_u8);
diff --git a/intrinsiccv/src/conversions/split_api.cpp b/intrinsiccv/src/conversions/split_api.cpp
index 2794da8ea..80286c9eb 100644
--- a/intrinsiccv/src/conversions/split_api.cpp
+++ b/intrinsiccv/src/conversions/split_api.cpp
@@ -6,5 +6,5 @@
 #include "intrinsiccv/dispatch.h"
 #include "intrinsiccv/intrinsiccv.h"
 
-INTRINSICCV_MULTIVERSION_C_API(intrinsiccv_split, intrinsiccv::neon::split,
+INTRINSICCV_MULTIVERSION_C_API(intrinsiccv_split, &intrinsiccv::neon::split,
                                nullptr, nullptr);
diff --git a/intrinsiccv/src/conversions/yuv_to_rgb_api.cpp b/intrinsiccv/src/conversions/yuv_to_rgb_api.cpp
index 24499bce5..4f051230f 100644
--- a/intrinsiccv/src/conversions/yuv_to_rgb_api.cpp
+++ b/intrinsiccv/src/conversions/yuv_to_rgb_api.cpp
@@ -6,10 +6,10 @@
 #include "intrinsiccv/dispatch.h"
 #include "intrinsiccv/intrinsiccv.h"
 
-#define INTRINSICCV_DEFINE_C_API(name, partialname)                    \
-  INTRINSICCV_MULTIVERSION_C_API(name, intrinsiccv::neon::partialname, \
-                                 intrinsiccv::sve2::partialname,       \
-                                 intrinsiccv::sme2::partialname)
+#define INTRINSICCV_DEFINE_C_API(name, partialname)                     \
+  INTRINSICCV_MULTIVERSION_C_API(name, &intrinsiccv::neon::partialname, \
+                                 &intrinsiccv::sve2::partialname,       \
+                                 &intrinsiccv::sme2::partialname)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_yuv_sp_to_rgb_u8, yuv_sp_to_rgb_u8);
 INTRINSICCV_DEFINE_C_API(intrinsiccv_yuv_sp_to_bgr_u8, yuv_sp_to_bgr_u8);
diff --git a/intrinsiccv/src/filters/gaussian_blur_api.cpp b/intrinsiccv/src/filters/gaussian_blur_api.cpp
index 29abc9585..39982b6a8 100644
--- a/intrinsiccv/src/filters/gaussian_blur_api.cpp
+++ b/intrinsiccv/src/filters/gaussian_blur_api.cpp
@@ -54,10 +54,10 @@ intrinsiccv_error_t intrinsiccv_filter_release(
 }  // extern "C"
 
 INTRINSICCV_MULTIVERSION_C_API(intrinsiccv_gaussian_blur_3x3_u8,
-                               intrinsiccv::neon::gaussian_blur_3x3_u8, nullptr,
-                               nullptr);
+                               &intrinsiccv::neon::gaussian_blur_3x3_u8,
+                               nullptr, nullptr);
 
 INTRINSICCV_MULTIVERSION_C_API(
-    intrinsiccv_gaussian_blur_5x5_u8, intrinsiccv::neon::gaussian_blur_5x5_u8,
+    intrinsiccv_gaussian_blur_5x5_u8, &intrinsiccv::neon::gaussian_blur_5x5_u8,
     INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::gaussian_blur_5x5_u8),
-    intrinsiccv::sme2::gaussian_blur_5x5_u8);
+    &intrinsiccv::sme2::gaussian_blur_5x5_u8);
diff --git a/intrinsiccv/src/filters/sobel_api.cpp b/intrinsiccv/src/filters/sobel_api.cpp
index 7154dfdba..b7663ca0f 100644
--- a/intrinsiccv/src/filters/sobel_api.cpp
+++ b/intrinsiccv/src/filters/sobel_api.cpp
@@ -6,11 +6,11 @@
 #include "intrinsiccv/filters/sobel.h"
 #include "intrinsiccv/intrinsiccv.h"
 
-#define INTRINSICCV_DEFINE_C_API(name, partialname)             \
-  INTRINSICCV_MULTIVERSION_C_API(                               \
-      name, intrinsiccv::neon::partialname,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::partialname), \
-      intrinsiccv::sme2::partialname)
+#define INTRINSICCV_DEFINE_C_API(name, partialname)              \
+  INTRINSICCV_MULTIVERSION_C_API(                                \
+      name, &intrinsiccv::neon::partialname,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::partialname), \
+      &intrinsiccv::sme2::partialname)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_sobel_3x3_horizontal_s16_u8,
                          sobel_3x3_horizontal_s16_u8);
diff --git a/intrinsiccv/src/morphology/morphology_api.cpp b/intrinsiccv/src/morphology/morphology_api.cpp
index 6c53f01d8..5fc168e8c 100644
--- a/intrinsiccv/src/morphology/morphology_api.cpp
+++ b/intrinsiccv/src/morphology/morphology_api.cpp
@@ -108,11 +108,11 @@ intrinsiccv_error_t intrinsiccv_morphology_release(
 
 }  // extern "C"
 
-#define INTRINSICCV_DEFINE_C_API(name, tname, type)             \
-  INTRINSICCV_MULTIVERSION_C_API(                               \
-      name, intrinsiccv::neon::tname<type>,                     \
-      INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::tname<type>), \
-      intrinsiccv::sme2::tname<type>)
+#define INTRINSICCV_DEFINE_C_API(name, tname, type)              \
+  INTRINSICCV_MULTIVERSION_C_API(                                \
+      name, &intrinsiccv::neon::tname<type>,                     \
+      INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::tname<type>), \
+      &intrinsiccv::sme2::tname<type>)
 
 INTRINSICCV_DEFINE_C_API(intrinsiccv_dilate_u8, dilate, uint8_t);
 INTRINSICCV_DEFINE_C_API(intrinsiccv_erode_u8, erode, uint8_t);
diff --git a/intrinsiccv/src/resize/resize_api.cpp b/intrinsiccv/src/resize/resize_api.cpp
index c259626ea..00a8d9a44 100644
--- a/intrinsiccv/src/resize/resize_api.cpp
+++ b/intrinsiccv/src/resize/resize_api.cpp
@@ -7,6 +7,6 @@
 #include "intrinsiccv/resize/resize.h"
 
 INTRINSICCV_MULTIVERSION_C_API(
-    intrinsiccv_resize_to_quarter_u8, intrinsiccv::neon::resize_to_quarter_u8,
-    INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::resize_to_quarter_u8),
-    intrinsiccv::sme2::resize_to_quarter_u8);
+    intrinsiccv_resize_to_quarter_u8, &intrinsiccv::neon::resize_to_quarter_u8,
+    INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::resize_to_quarter_u8),
+    &intrinsiccv::sme2::resize_to_quarter_u8);
diff --git a/intrinsiccv/src/resize/resize_linear_api.cpp b/intrinsiccv/src/resize/resize_linear_api.cpp
index 17c87fb59..d8c1ae8e6 100644
--- a/intrinsiccv/src/resize/resize_linear_api.cpp
+++ b/intrinsiccv/src/resize/resize_linear_api.cpp
@@ -7,6 +7,6 @@
 #include "intrinsiccv/resize/resize_linear.h"
 
 INTRINSICCV_MULTIVERSION_C_API(
-    intrinsiccv_resize_linear_u8, intrinsiccv::neon::resize_linear_u8,
-    INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::resize_linear_u8),
-    intrinsiccv::sme2::resize_linear_u8);
+    intrinsiccv_resize_linear_u8, &intrinsiccv::neon::resize_linear_u8,
+    INTRINSICCV_SVE2_IMPL_IF(&intrinsiccv::sve2::resize_linear_u8),
+    &intrinsiccv::sme2::resize_linear_u8);
diff --git a/scripts/ci.sh b/scripts/ci.sh
index fd194d7b1..8555681ab 100755
--- a/scripts/ci.sh
+++ b/scripts/ci.sh
@@ -51,7 +51,7 @@ ninja -C build/gcc
 
 # Run tests
 LONG_VECTOR_TESTS="GRAY2.*:RGB*"
-EXCLUDE_FLOAT_CONVERSION_TESTS="-FloatToIntTest*"
+EXCLUDE_FLOAT_CONVERSION_TESTS="-FloatConversion*"
 TESTRESULT=0
 qemu-aarch64     build/test/framework/intrinsiccv-framework-test --gtest_output=xml:build/test-results/ || TESTRESULT=1
 qemu-aarch64 -cpu cortex-a35 build/test/api/intrinsiccv-api-test --gtest_filter="${EXCLUDE_FLOAT_CONVERSION_TESTS}" --gtest_output=xml:build/test-results/clang-neon/ || TESTRESULT=1
diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp
new file mode 100644
index 000000000..2f7a26137
--- /dev/null
+++ b/test/api/test_float_conv.cpp
@@ -0,0 +1,476 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include "framework/array.h"
+#include "framework/generator.h"
+#include "framework/operation.h"
+#include "framework/utils.h"
+#include "intrinsiccv/intrinsiccv.h"
+#include "test_config.h"
+
+#define INTRINSICCV_float_conversion(I, input_type_name, O, output_type_name) \
+  INTRINSICCV_DIFF_IO_API(                                                    \
+      float_conversion,                                                       \
+      intrinsiccv_float_conversion_##input_type_name##_##output_type_name, I, \
+      O)
+
+INTRINSICCV_float_conversion(float, f32, int8_t, s8);
+INTRINSICCV_float_conversion(float, f32, uint8_t, u8);
+INTRINSICCV_float_conversion(int8_t, s8, float, f32);
+INTRINSICCV_float_conversion(uint8_t, u8, float, f32);
+
+template <typename InputType, typename OutputType>
+class FloatConversionTest final {
+ private:
+  template <typename T>
+  static constexpr T min() {
+    return std::numeric_limits<T>::min();
+  }
+
+  template <typename T>
+  static constexpr T max() {
+    return std::numeric_limits<T>::max();
+  }
+
+  struct Elements {
+    size_t width;
+    size_t height;
+
+    std::vector<std::vector<InputType>> source_rows;
+    std::vector<std::vector<OutputType>> expected_rows;
+
+    Elements(size_t _width, size_t _height,
+             std::vector<std::vector<InputType>>&& _source_rows,
+             std::vector<std::vector<OutputType>>&& _expected_rows)
+        : width(_width),
+          height(_height),
+          source_rows(std::move(_source_rows)),
+          expected_rows(std::move(_expected_rows)) {}
+  };
+
+  struct Values {
+    InputType source;
+    OutputType expected;
+  };
+
+  static constexpr uint32_t quietNaN = 0x7FC00000;
+  static constexpr uint32_t signalingNaN = 0x7FA00000;
+  static constexpr uint32_t posInfinity = 0x7F800000;
+  static constexpr uint32_t negInfinity = 0xFF800000;
+
+  static constexpr uint32_t minusNaN = 0xFF800001;
+  static constexpr uint32_t plusNaN = 0x7F800001;
+  static constexpr uint32_t plusZero = 0x00000000;
+  static constexpr uint32_t minusZero = 0x80000000;
+
+  static constexpr uint32_t oneNaN = 0x7FC00001;
+  static constexpr uint32_t zeroDivZero = 0xFFC00000;
+  static constexpr uint32_t floatMin = 0x00800000;
+  static constexpr uint32_t floatMax = 0x7F7FFFFF;
+
+  static constexpr uint32_t posSubnormalMin = 0x00000001;
+  static constexpr uint32_t posSubnormalMax = 0x007FFFFF;
+  static constexpr uint32_t negSubnormalMin = 0x80000001;
+  static constexpr uint32_t negSubnormalMax = 0x807FFFFF;
+
+  static constexpr float _floatval(uint32_t v) {
+    static_assert(sizeof(float) == 4);
+    INTRINSICCV_NO_STRICT_ALIASING_BEGIN
+    return *reinterpret_cast<float*>(&v);
+    INTRINSICCV_NO_STRICT_ALIASING_END
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<int8_t, O>, bool> = true>
+  const Elements& get_custom_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+      4, 8,
+      {{
+        { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) },
+        { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) },
+        { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) },
+        { _floatval(posSubnormalMin), _floatval(posSubnormalMax), _floatval(negSubnormalMin), _floatval(negSubnormalMax) },
+        { 1111.11, -1112.22, 113.33, 114.44 },
+        { 111.51, 112.62, 113.73, 114.84 },
+        { 126.66, 127.11, 128.66, 129.11 },
+        { 11.5, 12.5, -11.5, -12.5 }
+      }},
+      {{
+        { 0, 0, 127, -128 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 127 },
+        { 0, 0, 0, 0 },
+        { 127, -128, 113, 114 },
+        { 112, 113, 114, 115 },
+        { 127, 127, 127, 127 },
+        { 12, 12, -12, -12 }
+      }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<uint8_t, O>, bool> = true>
+  const Elements& get_custom_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+      4, 8,
+      {{
+        { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) },
+        { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) },
+        { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) },
+        { _floatval(posSubnormalMin), _floatval(posSubnormalMax), _floatval(negSubnormalMin), _floatval(negSubnormalMax) },
+        { 1111.11, -1112.22, 113.33, 114.44 },
+        { 111.51, 112.62, 113.73, 114.84 },
+        { 126.66, 127.11, 128.66, 129.11 },
+        { 11.5, 12.5, -11.5, -12.5 }
+      }},
+      {{
+        { 0, 0, 255, 0 },
+        { 0, 0, 0, 0 },
+        { 0, 0, 0, 255 },
+        { 0, 0, 0, 0 },
+        { 255, 0, 113, 114 },
+        { 112, 113, 114, 115 },
+        { 127, 127, 129, 129 },
+        { 12, 12, 0, 0 }
+      }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  const Elements& get_custom_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+      4, 6,
+      {{
+        { min<I>(), min<I>(), max<I>() - 1, max<I>() },
+        { min<I>(), min<I>(), min<I>(), min<I>() },
+        { min<I>(), min<I>(), min<I>(), max<I>() - 1 },
+        { max<I>() - 1, max<I>(), 113, 114 },
+        { 112, 113, 114, 115 },
+        { 12, 12, 12, 12 }
+      }},
+      {{
+        { min<I>(), min<I>(), max<I>() - 1.0, max<I>() },
+        { min<I>(), min<I>(), min<I>(), min<I>() },
+        { min<I>(), min<I>(), min<I>(), max<I>() - 1.0 },
+        { max<I>() - 1.0, max<I>(), 113.0, 114.0 },
+        { 112.0, 113.0, 114.0, 115.0 },
+        { 12.0, 12.0, 12.0, 12.0 }
+      }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true>
+  const Values& get_values() {
+    static const Values kTestValues = {
+        // clang-format off
+        10.67F, 11
+        // clang-format on
+    };
+    return kTestValues;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  const Values& get_values() {
+    static const Values kTestValues = {
+        // clang-format off
+        11, 11.0
+        // clang-format on
+    };
+    return kTestValues;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_integral_v<O>, bool> = true>
+  void calculate_expected(const test::Array2D<I>& source,
+                          test::Array2D<O>& expected) {
+    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
+      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
+        O calculated = 0;
+        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
+        I result = *source.at(hindex, vindex);
+        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
+        if (result > max<O>()) {
+          calculated = max<O>();
+        } else if (result < min<O>()) {
+          calculated = min<O>();
+        } else {
+          calculated = result;
+        }
+        *expected.at(hindex, vindex) = calculated;
+      }
+    }
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_integral_v<I>, bool> = true,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  void calculate_expected(const test::Array2D<I>& source,
+                          test::Array2D<OutputType>& expected) {
+    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
+      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
+        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
+        *expected.at(hindex, vindex) = *source.at(hindex, vindex);
+        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
+      }
+    }
+  }
+
+  template <typename T>
+  size_t get_linear_height(size_t width, size_t minimum_size) {
+    size_t image_size =
+        std::max(minimum_size, static_cast<size_t>(max<T>() - min<T>()));
+    size_t height = image_size / width + 1;
+
+    return height;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_integral_v<O>, bool> = true>
+  std::tuple<test::Array2D<I>, test::Array2D<O>, test::Array2D<O>>
+  get_linear_arrays(size_t width, size_t height) {
+    test::Array2D<I> source(width, height, 1, 1);
+    test::Array2D<O> expected(width, height, 1, 1);
+    test::Array2D<O> actual(width, height, 1, 1);
+
+    test::GenerateLinearSeries<I> generator(min<O>());
+
+    source.fill(generator);
+
+    calculate_expected<I, O>(source, expected);
+
+    return {source, expected, actual};
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_integral_v<I>, bool> = true,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  std::tuple<test::Array2D<I>, test::Array2D<O>, test::Array2D<O>>
+  get_linear_arrays(size_t width, size_t height) {
+    test::Array2D<I> source(width, height, 1, 1);
+    test::Array2D<O> expected(width, height, 1, 1);
+    test::Array2D<O> actual(width, height, 1, 1);
+
+    test::GenerateLinearSeries<I> generator(min<I>());
+
+    source.fill(generator);
+
+    calculate_expected<I, O>(source, expected);
+
+    return {source, expected, actual};
+  }
+
+ public:
+  // minimum_size set by caller to trigger the 'big' conversion path.
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_integral_v<O>, bool> = true>
+  void test_linear(size_t width, size_t minimum_size = 1) {
+    size_t height = get_linear_height<O>(width, minimum_size);
+
+    auto arrays = get_linear_arrays<I, O>(width, height);
+
+    test::Array2D<I>& source = std::get<0>(arrays);
+    test::Array2D<O>& expected = std::get<1>(arrays);
+    test::Array2D<O>& actual = std::get<2>(arrays);
+
+    ASSERT_EQ(INTRINSICCV_OK, (float_conversion<I, O>()(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_integral_v<I>, bool> = true,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  void test_linear(size_t width, size_t minimum_size = 1) {
+    size_t height = get_linear_height<I>(width, minimum_size);
+
+    auto arrays = get_linear_arrays<I, O>(width, height);
+
+    test::Array2D<I>& source = std::get<0>(arrays);
+    test::Array2D<O>& expected = std::get<1>(arrays);
+    test::Array2D<O>& actual = std::get<2>(arrays);
+
+    ASSERT_EQ(INTRINSICCV_OK, (float_conversion<I, O>()(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test_custom() {
+    auto elements_list = get_custom_elements<InputType, OutputType>();
+    const size_t& width = elements_list.width;
+    const size_t& height = elements_list.height;
+
+    test::Array2D<InputType> source(width, height);
+    test::Array2D<OutputType> expected(width, height);
+    test::Array2D<OutputType> actual(width, height);
+
+    for (size_t i = 0; i < height; i++) {
+      source.set(i, 0, elements_list.source_rows[i]);
+      expected.set(i, 0, elements_list.expected_rows[i]);
+    }
+
+    ASSERT_EQ(INTRINSICCV_OK, (float_conversion<InputType, OutputType>()(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test_sizes(const size_t width, const size_t height) {
+    auto values_list = get_values<InputType, OutputType>();
+    test::Array2D<InputType> source(width, height, 1, 1);
+
+    test::Array2D<OutputType> expected(width, height, 1, 1);
+
+    test::Array2D<OutputType> actual(width, height, 1, 1);
+
+    source.fill(values_list.source);
+
+    expected.fill(values_list.expected);
+
+    actual.fill(0);
+
+    ASSERT_EQ(INTRINSICCV_OK, (float_conversion<InputType, OutputType>()(
+                                  source.data(), source.stride(), actual.data(),
+                                  actual.stride(), width, height)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+};  // end of class FloatConversionTest
+
+template <typename ElementType>
+class FloatConversion : public testing::Test {};
+
+using ElementTypes =
+    ::testing::Types<std::pair<float, int8_t>, std::pair<float, uint8_t>,
+                     std::pair<int8_t, float>, std::pair<uint8_t, float>>;
+
+// Tests intrinsiccv_float_conversion API.
+TYPED_TEST_SUITE(FloatConversion, ElementTypes);
+
+TYPED_TEST(FloatConversion, NullPointer) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  InputType src[1] = {};
+  OutputType dst[1];
+  test::test_null_args(float_conversion<InputType, OutputType>(), src,
+                       sizeof(InputType), dst, sizeof(OutputType), 1, 1);
+}
+
+TYPED_TEST(FloatConversion, OversizeImage) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  InputType src[1] = {};
+  OutputType dst[1];
+  EXPECT_EQ(INTRINSICCV_ERROR_RANGE,
+            (float_conversion<InputType, OutputType>()(
+                src, sizeof(InputType), dst, sizeof(OutputType),
+                INTRINSICCV_MAX_IMAGE_PIXELS + 1, 1)));
+  EXPECT_EQ(INTRINSICCV_ERROR_RANGE,
+            (float_conversion<InputType, OutputType>()(
+                src, sizeof(InputType), dst, sizeof(OutputType), 1,
+                INTRINSICCV_MAX_IMAGE_PIXELS + 1)));
+  EXPECT_EQ(
+      INTRINSICCV_ERROR_RANGE,
+      (float_conversion<InputType, OutputType>()(
+          src, sizeof(TypeParam), dst, sizeof(OutputType),
+          INTRINSICCV_MAX_IMAGE_PIXELS + 1, INTRINSICCV_MAX_IMAGE_PIXELS + 1)));
+}
+
+TYPED_TEST(FloatConversion, Scalar) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}
+      .template test_linear<InputType, OutputType>(
+          test::Options::vector_length() - 1);
+}
+TYPED_TEST(FloatConversion, Vector) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}
+      .template test_linear<InputType, OutputType>(
+          test::Options::vector_length() * 2);
+}
+TYPED_TEST(FloatConversion, Custom) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_custom();
+}
+TYPED_TEST(FloatConversion, CustomFits128VectorSize) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(4, 1);
+}
+TYPED_TEST(FloatConversion, CustomFits128VectorSize2x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(4, 2);
+}
+TYPED_TEST(FloatConversion, CustomFits128VectorSize3x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(4, 3);
+}
+TYPED_TEST(FloatConversion, CustomFits512VectorSize) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(4, 4);
+}
+TYPED_TEST(FloatConversion, CustomFits512VectorSize2x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(4, 8);
+}
+TYPED_TEST(FloatConversion, CustomFits512VectorSize3x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(6, 8);
+}
+TYPED_TEST(FloatConversion, Custom128OneRemaining) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(1, 17);
+}
+TYPED_TEST(FloatConversion, Custom128AllButOneRemaining) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(5, 3);
+}
+TYPED_TEST(FloatConversion, CustomAboutHalfRemaining) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(19, 2);
+}
+TYPED_TEST(FloatConversion, CustomEmpty) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(0, 0);
+}
+TYPED_TEST(FloatConversion, CustomOne) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizes(1, 1);
+}
diff --git a/test/api/test_float_to_int.cpp b/test/api/test_float_to_int.cpp
deleted file mode 100644
index 4c7aea619..000000000
--- a/test/api/test_float_to_int.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include <gtest/gtest.h>
-
-#include "framework/array.h"
-#include "framework/generator.h"
-#include "framework/operation.h"
-#include "framework/special_floats.h"
-#include "framework/utils.h"
-#include "intrinsiccv/intrinsiccv.h"
-#include "test_config.h"
-
-template <typename ElementType>
-class FloatToIntTestBase {
- private:
-  template <typename T>
-  static constexpr T min() {
-    return std::numeric_limits<T>::min();
-  }
-
-  template <typename T>
-  static constexpr T max() {
-    return std::numeric_limits<T>::max();
-  }
-
-  template <typename OutputType>
-  struct Elements {
-    size_t width;
-    size_t height;
-
-    std::vector<std::vector<ElementType>> source_rows;
-    std::vector<std::vector<OutputType>> expected_rows;
-
-    Elements(size_t _width, size_t _height,
-             std::vector<std::vector<ElementType>>&& _source_rows,
-             std::vector<std::vector<OutputType>>&& _expected_rows)
-        : width(_width),
-          height(_height),
-          source_rows(std::move(_source_rows)),
-          expected_rows(std::move(_expected_rows)) {}
-  };
-
-  static constexpr uint32_t quietNaN = 0x7FC00000;
-  static constexpr uint32_t signalingNaN = 0x7FA00000;
-  static constexpr uint32_t posInfinity = 0x7F800000;
-  static constexpr uint32_t negInfinity = 0xFF800000;
-
-  static constexpr uint32_t minusNaN = 0xFF800001;
-  static constexpr uint32_t plusNaN = 0x7F800001;
-  static constexpr uint32_t plusZero = 0x00000000;
-  static constexpr uint32_t minusZero = 0x80000000;
-
-  static constexpr uint32_t oneNaN = 0x7FC00001;
-  static constexpr uint32_t zeroDivZero = 0xFFC00000;
-  static constexpr uint32_t floatMin = 0x00800000;
-  static constexpr uint32_t floatMax = 0x7F7FFFFF;
-
-  static constexpr uint32_t posSubnormalMin = 0x00000001;
-  static constexpr uint32_t posSubnormalMax = 0x007FFFFF;
-  static constexpr uint32_t negSubnormalMin = 0x80000001;
-  static constexpr uint32_t negSubnormalMax = 0x807FFFFF;
-
-  static constexpr float _floatval(uint32_t v) {
-    static_assert(sizeof(float) == 4);
-    INTRINSICCV_NO_STRICT_ALIASING_BEGIN
-    return *reinterpret_cast<float*>(&v);
-    INTRINSICCV_NO_STRICT_ALIASING_END
-  }
-
-  const Elements<int8_t> test_case_custom_f32_s8 = {
-      // clang-format off
-    4, 8,
-    {{
-      { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) },
-      { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) },
-      { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) },
-      { _floatval(posSubnormalMin), _floatval(posSubnormalMax), _floatval(negSubnormalMin), _floatval(negSubnormalMax) },
-      { 1111.11, -1112.22, 113.33, 114.44 },
-      { 111.51, 112.62, 113.73, 114.84 },
-      { 126.66, 127.11, 128.66, 129.11 },
-      { 11.5, 12.5, -11.5, -12.5 }
-    }},
-    {{
-      { 0, 0, 127, -128 },
-      { 0, 0, 0, 0 },
-      { 0, 0, 0, 127 },
-      { 0, 0, 0, 0 },
-      { 127, -128, 113, 114 },
-      { 112, 113, 114, 115 },
-      { 127, 127, 127, 127 },
-      { 12, 12, -12, -12 }
-    }}
-      // clang-format on
-  };
-
-  const Elements<uint8_t> test_case_custom_f32_u8 = {
-      // clang-format off
-    4, 8,
-    {{
-      { _floatval(quietNaN), _floatval(signalingNaN), _floatval(posInfinity), _floatval(negInfinity) },
-      { _floatval(minusNaN), _floatval(plusNaN), _floatval(plusZero), _floatval(minusZero) },
-      { _floatval(oneNaN), _floatval(zeroDivZero), _floatval(floatMin), _floatval(floatMax) },
-      { _floatval(posSubnormalMin), _floatval(posSubnormalMax), _floatval(negSubnormalMin), _floatval(negSubnormalMax) },
-      { 1111.11, -1112.22, 113.33, 114.44 },
-      { 111.51, 112.62, 113.73, 114.84 },
-      { 126.66, 127.11, 128.66, 129.11 },
-      { 11.5, 12.5, -11.5, -12.5 }
-    }},
-    {{
-      { 0, 0, 255, 0 },
-      { 0, 0, 0, 0 },
-      { 0, 0, 0, 255 },
-      { 0, 0, 0, 0 },
-      { 255, 0, 113, 114 },
-      { 112, 113, 114, 115 },
-      { 127, 127, 129, 129 },
-      { 12, 12, 0, 0 }
-    }}
-      // clang-format on
-  };
-
-  template <typename OutputType>
-  void calculate_expected(const test::Array2D<ElementType>& source,
-                          test::Array2D<OutputType>& expected) {
-    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
-      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
-        OutputType calculated = 0;
-        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
-        ElementType result = *source.at(hindex, vindex);
-        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
-        if (result > max<OutputType>()) {
-          calculated = max<OutputType>();
-        } else if (result < min<OutputType>()) {
-          calculated = min<OutputType>();
-        } else {
-          calculated = result;
-        }
-        *expected.at(hindex, vindex) = calculated;
-      }
-    }
-  }
-
-  class GenerateLinearSeries : public test::Generator<ElementType> {
-   public:
-    explicit GenerateLinearSeries(ElementType start_from)
-        : counter_{start_from} {}
-
-    std::optional<ElementType> next() override { return counter_++; }
-
-   private:
-    ElementType counter_;
-  };  // end of class GenerateLinearSeries
-
-  template <typename T>
-  size_t get_linear_height(size_t width, size_t minimum_size) {
-    size_t image_size =
-        std::max(minimum_size, static_cast<size_t>(max<T>() - min<T>()));
-    size_t height = image_size / width + 1;
-
-    return height;
-  }
-
-  template <typename OutputType>
-  std::tuple<test::Array2D<ElementType>, test::Array2D<OutputType>,
-             test::Array2D<OutputType>>
-  get_linear_arrays(size_t width, size_t height) {
-    test::Array2D<ElementType> source(width, height, 1, 1);
-    test::Array2D<OutputType> expected(width, height, 1, 1);
-    test::Array2D<OutputType> actual(width, height, 1, 1);
-
-    GenerateLinearSeries generator(min<OutputType>());
-
-    source.fill(generator);
-
-    calculate_expected<OutputType>(source, expected);
-
-    return {source, expected, actual};
-  }
-
- public:
-  // minimum_size set by caller to trigger the 'big' conversion path.
-  void test_linear(size_t width, size_t minimum_size = 1) {
-    size_t height = get_linear_height<int8_t>(width, minimum_size);
-
-    auto arrays_s8 = get_linear_arrays<int8_t>(width, height);
-
-    test::Array2D<ElementType>& source_s8 = std::get<0>(arrays_s8);
-    test::Array2D<int8_t>& expected_s8 = std::get<1>(arrays_s8);
-    test::Array2D<int8_t>& actual_s8 = std::get<2>(arrays_s8);
-
-    ASSERT_EQ(INTRINSICCV_OK,
-              intrinsiccv_type_conversion_f32_s8(
-                  source_s8.data(), source_s8.stride(), actual_s8.data(),
-                  actual_s8.stride(), width, height));
-
-    EXPECT_EQ_ARRAY2D(expected_s8, actual_s8);
-
-    auto arrays_u8 = get_linear_arrays<uint8_t>(width, height);
-
-    test::Array2D<ElementType>& source_u8 = std::get<0>(arrays_u8);
-    test::Array2D<uint8_t>& expected_u8 = std::get<1>(arrays_u8);
-    test::Array2D<uint8_t>& actual_u8 = std::get<2>(arrays_u8);
-
-    ASSERT_EQ(INTRINSICCV_OK,
-              intrinsiccv_type_conversion_f32_u8(
-                  source_u8.data(), source_u8.stride(), actual_u8.data(),
-                  actual_u8.stride(), width, height));
-
-    EXPECT_EQ_ARRAY2D(expected_u8, actual_u8);
-  }
-
-  void test_custom_f32_s8() {
-    const size_t& width = test_case_custom_f32_s8.width;
-    const size_t& height = test_case_custom_f32_s8.height;
-
-    test::Array2D<ElementType> source(width, height);
-    test::Array2D<int8_t> expected(width, height);
-    test::Array2D<int8_t> actual(width, height);
-
-    for (size_t i = 0; i < height; i++) {
-      source.set(i, 0, test_case_custom_f32_s8.source_rows[i]);
-      expected.set(i, 0, test_case_custom_f32_s8.expected_rows[i]);
-    }
-
-    ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_s8(
-                                  source.data(), source.stride(), actual.data(),
-                                  actual.stride(), width, height));
-
-    EXPECT_EQ_ARRAY2D(expected, actual);
-  }
-
-  void test_custom_f32_u8() {
-    const size_t& width = test_case_custom_f32_u8.width;
-    const size_t& height = test_case_custom_f32_u8.height;
-
-    test::Array2D<ElementType> source(width, height);
-    test::Array2D<uint8_t> expected(width, height);
-    test::Array2D<uint8_t> actual(width, height);
-
-    for (size_t i = 0; i < height; i++) {
-      source.set(i, 0, test_case_custom_f32_u8.source_rows[i]);
-      expected.set(i, 0, test_case_custom_f32_u8.expected_rows[i]);
-    }
-
-    ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_type_conversion_f32_u8(
-                                  source.data(), source.stride(), actual.data(),
-                                  actual.stride(), width, height));
-
-    EXPECT_EQ_ARRAY2D(expected, actual);
-  }
-
-  void test_fill(const size_t width, const size_t height) {
-    test::Array2D<ElementType> source(width, height, 1, 1);
-
-    test::Array2D<int8_t> expected_s8(width, height, 1, 1);
-    test::Array2D<uint8_t> expected_u8(width, height, 1, 1);
-
-    test::Array2D<int8_t> actual_s8(width, height, 1, 1);
-    test::Array2D<uint8_t> actual_u8(width, height, 1, 1);
-
-    source.fill(10.67F);
-
-    expected_s8.fill(11);
-    expected_u8.fill(11);
-
-    actual_s8.fill(0);
-    actual_u8.fill(0);
-
-    ASSERT_EQ(INTRINSICCV_OK,
-              intrinsiccv_type_conversion_f32_s8(
-                  source.data(), source.stride(), actual_s8.data(),
-                  actual_s8.stride(), width, height));
-
-    EXPECT_EQ_ARRAY2D(expected_s8, actual_s8);
-
-    ASSERT_EQ(INTRINSICCV_OK,
-              intrinsiccv_type_conversion_f32_u8(
-                  source.data(), source.stride(), actual_u8.data(),
-                  actual_u8.stride(), width, height));
-
-    EXPECT_EQ_ARRAY2D(expected_u8, actual_u8);
-  }
-};  // end of class FloatToIntTestBase
-
-template <typename TypeParam>
-class FloatToIntTest : public testing::Test {};
-
-using ElementTypes = ::testing::Types<float>;
-
-// Tests intrinsiccv_float_to_int API.
-TYPED_TEST_SUITE(FloatToIntTest, ElementTypes);
-
-TYPED_TEST(FloatToIntTest, TestScalar) {
-  FloatToIntTestBase<TypeParam>{}.test_linear(test::Options::vector_length() -
-                                              1);
-}
-TYPED_TEST(FloatToIntTest, TestVector) {
-  FloatToIntTestBase<TypeParam>{}.test_linear(test::Options::vector_length() *
-                                              2);
-}
-TYPED_TEST(FloatToIntTest, TestCustomValuesFloat32ToInt8) {
-  FloatToIntTestBase<TypeParam>{}.test_custom_f32_s8();
-}
-TYPED_TEST(FloatToIntTest, TestCustomValuesFloat32ToUInt8) {
-  FloatToIntTestBase<TypeParam>{}.test_custom_f32_u8();
-}
-TYPED_TEST(FloatToIntTest, TestCustomFits128VectorSize) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(4, 1);
-}
-TYPED_TEST(FloatToIntTest, TestCustomFits128VectorSize2x) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(4, 2);
-}
-TYPED_TEST(FloatToIntTest, TestCustomFits128VectorSize3x) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(4, 3);
-}
-TYPED_TEST(FloatToIntTest, TestCustomFits512VectorSize) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(4, 4);
-}
-TYPED_TEST(FloatToIntTest, TestCustomFits512VectorSize2x) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(4, 8);
-}
-TYPED_TEST(FloatToIntTest, TestCustomFits512VectorSize3x) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(6, 8);
-}
-TYPED_TEST(FloatToIntTest, TestCustom128OneRemaining) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(1, 17);
-}
-TYPED_TEST(FloatToIntTest, TestCustom128AllButOneRemaining) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(5, 3);
-}
-TYPED_TEST(FloatToIntTest, TestCustomAboutHalfRemaining) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(19, 2);
-}
-TYPED_TEST(FloatToIntTest, TestCustomEmpty) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(0, 0);
-}
-TYPED_TEST(FloatToIntTest, TestCustomOne) {
-  FloatToIntTestBase<TypeParam>{}.test_fill(1, 1);
-}
diff --git a/test/framework/generator.h b/test/framework/generator.h
index 23126630f..c8321ad78 100644
--- a/test/framework/generator.h
+++ b/test/framework/generator.h
@@ -12,6 +12,18 @@
 
 namespace test {
 
+template <typename ElementType>
+class GenerateLinearSeries : public Generator<ElementType> {
+ public:
+  explicit GenerateLinearSeries(ElementType start_from)
+      : counter_{start_from} {}
+
+  std::optional<ElementType> next() override { return counter_++; }
+
+ private:
+  ElementType counter_;
+};  // end of class GenerateLinearSeries
+
 // Generates pseudo-random numbers of a given type.
 template <typename ElementType>
 class PseudoRandomNumberGenerator : public Generator<ElementType> {
diff --git a/test/framework/special_floats.h b/test/framework/special_floats.h
deleted file mode 100644
index 780f7129d..000000000
--- a/test/framework/special_floats.h
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef INTRINSICCV_TEST_FRAMEWORK_SPECIAL_FLOATS_H_
-#define INTRINSICCV_TEST_FRAMEWORK_SPECIAL_FLOATS_H_
-
-#endif  // INTRINSICCV_TEST_FRAMEWORK_SPECIAL_FLOATS_H_
diff --git a/test/framework/utils.h b/test/framework/utils.h
index 35e41e62e..83943d0d0 100644
--- a/test/framework/utils.h
+++ b/test/framework/utils.h
@@ -24,6 +24,14 @@
     return impl;                                                              \
   }
 
+#define INTRINSICCV_DIFF_IO_API(name, impl, itype, otype)                     \
+  template <typename InputType, typename OutputType,                          \
+            std::enable_if_t<std::is_same_v<InputType, itype>, bool> = true,  \
+            std::enable_if_t<std::is_same_v<OutputType, otype>, bool> = true> \
+  static decltype(auto) name() {                                              \
+    return impl;                                                              \
+  }
+
 // Generates a fatal failure with a generic message, and returns with a given
 // value.
 #define TEST_FAIL_WITH(return_value, message)                          \
-- 
GitLab