diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp
index 04b672a081a92ea49e17737bcf3af11b5de9d10d..627fb545d1af976c558545610688ab18559641bb 100644
--- a/adapters/opencv/kleidicv_hal.cpp
+++ b/adapters/opencv/kleidicv_hal.cpp
@@ -707,6 +707,33 @@ int convertTo(const uchar *src_data, size_t src_step, int src_depth,
     }
   }
 
+  // type conversion only
+  if (scale == 1.0 && shift == 0.0) {
+    // float32 to int8
+    if (src_depth == CV_32F && dst_depth == CV_8S) {
+      return convert_error(kleidicv_float_conversion_f32_s8(
+          reinterpret_cast<const float *>(src_data), src_step,
+          reinterpret_cast<int8_t *>(dst_data), dst_step, width, height));
+    }
+    // float32 to uint8
+    if (src_depth == CV_32F && dst_depth == CV_8U) {
+      return convert_error(kleidicv_float_conversion_f32_u8(
+          reinterpret_cast<const float *>(src_data), src_step,
+          reinterpret_cast<uint8_t *>(dst_data), dst_step, width, height));
+    }
+    // int8 to float32
+    if (src_depth == CV_8S && dst_depth == CV_32F) {
+      return convert_error(kleidicv_float_conversion_s8_f32(
+          reinterpret_cast<const int8_t *>(src_data), src_step,
+          reinterpret_cast<float *>(dst_data), dst_step, width, height));
+    }
+    // uint8 to float32
+    if (src_depth == CV_8U && dst_depth == CV_32F) {
+      return convert_error(kleidicv_float_conversion_u8_f32(
+          reinterpret_cast<const uint8_t *>(src_data), src_step,
+          reinterpret_cast<float *>(dst_data), dst_step, width, height));
+    }
+  }
   return CV_HAL_ERROR_NOT_IMPLEMENTED;
 }
 
diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp
index e4772357e20909e97d946be93649e91e49081c1a..780400ec9e64080a03fcf3cad5c1a2a1928c2041 100644
--- a/benchmark/benchmark.cpp
+++ b/benchmark/benchmark.cpp
@@ -76,6 +76,36 @@ BENCH_UNARY_OP(rgba_to_yuv_u8, 4, uint8_t);
 BENCH_UNARY_OP(bgr_to_yuv_u8, 3, uint8_t);
 BENCH_UNARY_OP(bgra_to_yuv_u8, 4, uint8_t);
 
+template <typename I, typename O, typename Function>
+static void bench_unary_op(Function f, benchmark::State& state) {
+  // Setup
+  std::vector<I> src;
+  std::vector<O> dst;
+  src.resize(image_width * image_height);
+  dst.resize(image_width * image_height);
+
+  std::mt19937 generator;
+  std::generate(src.begin(), src.end(), generator);
+
+  for (auto _ : state) {
+    // This code gets benchmarked
+    auto unused = f(src.data(), image_width, dst.data(), image_width,
+                    image_width, image_height);
+    (void)unused;
+  }
+}
+
+#define BENCH_UNARY_OP_DIFFERENT_IO_TYPES(name, itype, otype) \
+  static void name(benchmark::State& state) {                 \
+    bench_unary_op<itype, otype>(kleidicv_##name, state);     \
+  }                                                           \
+  BENCHMARK(name)
+
+BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_s8, float, int8_t);
+BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_f32_u8, float, uint8_t);
+BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_s8_f32, int8_t, float);
+BENCH_UNARY_OP_DIFFERENT_IO_TYPES(float_conversion_u8_f32, uint8_t, float);
+
 static void min_max_loc_u8(benchmark::State& state) {
   // Setup
   std::vector<uint8_t> src;
diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt
index 4fc695794d84c71fd7dc685035d19aecfc01be2c..7bb122fd65e206a05b9cea6b5662a44370490fc1 100644
--- a/conformity/opencv/CMakeLists.txt
+++ b/conformity/opencv/CMakeLists.txt
@@ -36,6 +36,7 @@ add_executable(
   test_rgb2yuv.cpp
   test_sobel.cpp
   test_exp.cpp
+  test_float_conv.cpp
 )
 
 target_link_libraries(
@@ -76,6 +77,8 @@ add_executable(
   test_rgb2yuv.cpp
   test_sobel.cpp
   test_exp.cpp
+  test_float_conv.cpp
+
 )
 
 target_link_libraries(
diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..13e8c88121c238ec941cfc07c70d8ef605224349
--- /dev/null
+++ b/conformity/opencv/test_float_conv.cpp
@@ -0,0 +1,209 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "test_float_conv.h"
+
+#include <limits>
+#include <vector>
+
+float floatval(uint32_t v) {
+  float result;
+  static_assert(sizeof(result) == sizeof(v));
+  memcpy(&result, &v, sizeof(result));
+  return result;
+}
+
+float quietNaN = std::numeric_limits<float>::quiet_NaN();
+float signalingNaN = std::numeric_limits<float>::signaling_NaN();
+float posInfinity = std::numeric_limits<float>::infinity();
+float negInfinity = -std::numeric_limits<float>::infinity();
+
+float minusNaN = floatval(0xFF800001);
+float plusNaN = floatval(0x7F800001);
+float plusZero = 0.0F;
+float minusZero = -0.0F;
+
+float oneNaN = floatval(0x7FC00001);
+float zeroDivZero = -std::numeric_limits<float>::quiet_NaN();
+float floatMin = std::numeric_limits<float>::min();
+float floatMax = std::numeric_limits<float>::max();
+
+float posSubnormalMin = std::numeric_limits<float>::denorm_min();
+float posSubnormalMax = floatval(0x007FFFFF);
+float negSubnormalMin = -std::numeric_limits<float>::denorm_min();
+float negSubnormalMax = floatval(0x807FFFFF);
+
+template <bool Signed>
+cv::Mat exec_float32_to_int8(cv::Mat& input) {
+  cv::Mat result;
+  input.convertTo(result, Signed ? CV_8SC1 : CV_8UC1);
+  return result;
+}
+
+cv::Mat exec_int8_to_float32(cv::Mat& input) {
+  cv::Mat result;
+  input.convertTo(result, CV_32FC1);
+  return result;
+}
+
+#if MANAGER
+template <bool Signed, size_t Channels>
+bool test_float32_to_int8_random(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, CV_32FC(Channels));
+      rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000);
+
+      cv::Mat actual = exec_float32_to_int8<Signed>(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<uint8_t>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+template <bool Signed, size_t Channels>
+bool test_int8_to_float32_random(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t x = 5; x <= 16; ++x) {
+    for (size_t y = 5; y <= 16; ++y) {
+      cv::Mat input(x, y, Signed ? CV_8SC(Channels) : CV_8UC(Channels));
+      rng.fill(input, cv::RNG::UNIFORM, Signed ? -1000 : 0, 1000);
+
+      cv::Mat actual = exec_int8_to_float32(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_matrices_different<float>(0, actual, expected)) {
+        fail_print_matrices(x, y, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+static constexpr int custom_data_float_height = 8;
+static constexpr int custom_data_float_width = 4;
+
+static float
+    custom_data_float[custom_data_float_height * custom_data_float_width] = {
+        // clang-format off
+        quietNaN, signalingNaN, posInfinity, negInfinity,
+        minusNaN, plusNaN, plusZero, minusZero,
+        oneNaN, zeroDivZero, floatMin, floatMax,
+        posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax,
+        1111.11, -1112.22, 113.33, 114.44,
+        111.51, 112.62, 113.73, 114.84,
+        126.66, 127.11, 128.66, 129.11,
+        11.5, 12.5, -11.5, -12.5,
+        // clang-format on
+};
+
+static constexpr int custom_data_int8_height = 1;
+static constexpr int custom_data_int8_width = 7;
+
+static int8_t
+    custom_data_int8[custom_data_int8_height * custom_data_int8_width] = {
+        // clang-format off
+        -128, -127, -1, 0, 1, 126, 127
+        // clang-format on
+};
+
+static uint8_t
+    custom_data_uint8[custom_data_int8_height * custom_data_int8_width] = {
+        // clang-format off
+        0, 1, 126, 127, 128, 254, 255
+        // clang-format on
+};
+
+template <bool Signed>
+bool test_float32_to_int8_custom(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::Mat input(custom_data_float_height, custom_data_float_width, CV_32FC1,
+                custom_data_float);
+
+  cv::Mat actual = exec_float32_to_int8<Signed>(input);
+  cv::Mat expected =
+      get_expected_from_subordinate(index, request_queue, reply_queue, input);
+
+  if (are_matrices_different<uint8_t>(0, actual, expected)) {
+    fail_print_matrices(custom_data_float_height, custom_data_float_width,
+                        input, actual, expected);
+    return true;
+  }
+
+  return false;
+}
+
+template <bool Signed>
+bool test_int8_to_float32_custom(int index,
+                                 RecreatedMessageQueue& request_queue,
+                                 RecreatedMessageQueue& reply_queue) {
+  cv::Mat input(custom_data_int8_height, custom_data_int8_width,
+                Signed ? CV_8SC1 : CV_8UC1,
+                Signed ? static_cast<void*>(custom_data_int8)
+                       : static_cast<void*>(custom_data_uint8));
+
+  cv::Mat actual = exec_int8_to_float32(input);
+  cv::Mat expected =
+      get_expected_from_subordinate(index, request_queue, reply_queue, input);
+
+  if (are_matrices_different<float>(0, actual, expected)) {
+    fail_print_matrices(custom_data_int8_height, custom_data_int8_width, input,
+                        actual, expected);
+    return true;
+  }
+
+  return false;
+}
+#endif
+
+std::vector<test>& float_conversion_tests_get() {
+  // clang-format off
+  static std::vector<test> tests = {
+    TEST("Float32 to Signed Int8, random, 1 channel", (test_float32_to_int8_random<true, 1>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, random, 2 channel", (test_float32_to_int8_random<true, 2>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, random, 3 channel", (test_float32_to_int8_random<true, 3>), exec_float32_to_int8<true>),
+    TEST("Float32 to Signed Int8, random, 4 channel", (test_float32_to_int8_random<true, 4>), exec_float32_to_int8<true>),
+
+    TEST("Float32 to Unsigned Int8, random, 1 channel", (test_float32_to_int8_random<false, 1>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, random, 2 channel", (test_float32_to_int8_random<false, 2>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, random, 3 channel", (test_float32_to_int8_random<false, 3>), exec_float32_to_int8<false>),
+    TEST("Float32 to Unsigned Int8, random, 4 channel", (test_float32_to_int8_random<false, 4>), exec_float32_to_int8<false>),
+
+    TEST("Float32 to Signed Int8, custom (special)", test_float32_to_int8_custom<true>, exec_float32_to_int8<true>),
+    TEST("Float32 to Unsigned Int8, custom (special)", test_float32_to_int8_custom<false>, exec_float32_to_int8<false>),
+
+    TEST("Signed Int8 to Float32, random, 1 channel", (test_int8_to_float32_random<true, 1>), exec_int8_to_float32),
+    TEST("Signed Int8 to Float32, random, 2 channel", (test_int8_to_float32_random<true, 2>), exec_int8_to_float32),
+    TEST("Signed Int8 to Float32, random, 3 channel", (test_int8_to_float32_random<true, 3>), exec_int8_to_float32),
+    TEST("Signed Int8 to Float32, random, 4 channel", (test_int8_to_float32_random<true, 4>), exec_int8_to_float32),
+
+    TEST("Unsigned Int8 to Float32, random, 1 channel", (test_int8_to_float32_random<false, 1>), exec_int8_to_float32),
+    TEST("Unsigned Int8 to Float32, random, 2 channel", (test_int8_to_float32_random<false, 2>), exec_int8_to_float32),
+    TEST("Unsigned Int8 to Float32, random, 3 channel", (test_int8_to_float32_random<false, 3>), exec_int8_to_float32),
+    TEST("Unsigned Int8 to Float32, random, 4 channel", (test_int8_to_float32_random<false, 4>), exec_int8_to_float32),
+
+    TEST("Signed Int8 to Float32, custom", test_int8_to_float32_custom<true>, exec_int8_to_float32),
+    TEST("Unigned Int8 to Float32, custom", test_int8_to_float32_custom<false>, exec_int8_to_float32),
+  };
+  // clang-format on
+  return tests;
+}
diff --git a/conformity/opencv/test_float_conv.h b/conformity/opencv/test_float_conv.h
new file mode 100644
index 0000000000000000000000000000000000000000..9e4c40be6f28f9d49e2ca3e525ce3fd6c395b094
--- /dev/null
+++ b/conformity/opencv/test_float_conv.h
@@ -0,0 +1,14 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_
+#define KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_
+
+#include <vector>
+
+#include "tests.h"
+
+std::vector<test>& float_conversion_tests_get();
+
+#endif  // KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_
diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp
index cb5fe944c3f92105461ce6548b90a4db8c0171f7..8064a6565ec987aa949cc235ed83f686fceede89 100644
--- a/conformity/opencv/tests.cpp
+++ b/conformity/opencv/tests.cpp
@@ -12,6 +12,7 @@
 #include "opencv2/imgproc.hpp"
 #include "test_binary_op.h"
 #include "test_exp.h"
+#include "test_float_conv.h"
 #include "test_gaussian_blur.h"
 #include "test_min_max.h"
 #include "test_rgb2yuv.h"
@@ -34,6 +35,7 @@ std::vector<test> all_tests = merge_tests({
     rgb2yuv_tests_get,
     sobel_tests_get,
     exp_tests_get,
+    float_conversion_tests_get,
 });
 
 #if MANAGER
diff --git a/doc/opencv.md b/doc/opencv.md
index 0aa0bb525c201b8c4e70ecec436afd60538051a6..57f7d7e32a228d4d9df5a178b1564d5b6cde50c6 100644
--- a/doc/opencv.md
+++ b/doc/opencv.md
@@ -167,7 +167,16 @@ Notes on parameters:
   + `CV_32S`
 
 ### `convertTo`
-Currently converting to different data types is not supported. This function scales given input of `src_depth == CV_8U` using `scale` and `shift`.
+This function will scale given input using `scale` and `shift` if they are significant enough, and if `src_depth` and `dst_depth` are equal to `CV_8U`.
+
+Additionally, it is able to convert between data types as follows:
+
+| src_depth | dst_depth |
+|-----------|-----------|
+|   CV_32F  |   CV_8S   |
+|   CV_32F  |   CV_8U   |
+|   CV_8S   |   CV_32F  |
+|   CV_8U   |   CV_32F  |
 
 ### `exp`
 Exponential function. Currently only `CV_32F` type is supported.
@@ -181,4 +190,4 @@ Notes on parameters:
 * `operation` - flag specifying correspondence between the arrays.
 Supported [OpenCV cmp types](https://docs.opencv.org/5.x/d2/de8/group__core__array.html#ga0cc47ff833d40b58ecbe1d609a53d784) are:
   + `cv::CMP_EQ `
-  + `cv::CMP_GT`
+  + `cv::CMP_GT`
\ No newline at end of file
diff --git a/kleidicv/CMakeLists.txt b/kleidicv/CMakeLists.txt
index b06555250b62d3c7c1e6544e283606f9e02092dc..8ece23cd4956acfacc9a3a0a5130c9645e2705a3 100644
--- a/kleidicv/CMakeLists.txt
+++ b/kleidicv/CMakeLists.txt
@@ -166,7 +166,7 @@ if(KLEIDICV_BUILD_SME2)
   set_target_properties(kleidicv_sme2 PROPERTIES CXX_STANDARD 17)
   target_compile_options(kleidicv_sme2 PRIVATE
     ${KLEIDICV_CXX_FLAGS}
-    "-march=armv9-a+sve2+sme2"
+    "-march=armv9-a+sve2+sme2+nosimd"
     "-DKLEIDICV_TARGET_SME2=1"
   )
 endif()
diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h
index 88dfdc6bd2e6ee91d96a29e6bf1948464e45010d..b27f585826f59b5ce5d1aa7cb6cf0e7ccd0c4dcf 100644
--- a/kleidicv/include/kleidicv/kleidicv.h
+++ b/kleidicv/include/kleidicv/kleidicv.h
@@ -1487,6 +1487,65 @@ KLEIDICV_API_DECLARATION(kleidicv_exp_f32, const float *src, size_t src_stride,
                          float *dst, size_t dst_stride, size_t width,
                          size_t height);
 
+/// Converts the elements in `src` from a floating-point type to an integer
+/// type, then stores the result in `dst`.
+///
+/// Each resulting element is saturated, i.e. it is the smallest/largest
+/// number of the type of the element if the `src` data type cannot be
+/// represented as the `dst` type. In case of some special values, such as the
+/// different variations of `NaN`, the result is `0`. Source and destination
+/// data length is `width` * `height`. Number of elements is limited to @ref
+/// KLEIDICV_MAX_IMAGE_PIXELS.
+///
+/// @param src          Pointer to the source data. Must be non-null.
+/// @param src_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the source data.
+///                     Must not be less than width * sizeof(type).
+/// @param dst          Pointer to the destination data. Must be non-null.
+/// @param dst_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the destination data.
+///                     Must not be less than width * sizeof(type).
+/// @param width        Number of elements in a row.
+/// @param height       Number of rows in the data.
+///
+KLEIDICV_API_DECLARATION(kleidicv_float_conversion_f32_s8, const float *src,
+                         size_t src_stride, int8_t *dst, size_t dst_stride,
+                         size_t width, size_t height);
+/// @copydoc kleidicv_float_conversion_f32_s8
+KLEIDICV_API_DECLARATION(kleidicv_float_conversion_f32_u8, const float *src,
+                         size_t src_stride, uint8_t *dst, size_t dst_stride,
+                         size_t width, size_t height);
+
+/// Converts the elements in `src` from an integer type to a floating-point
+/// type, then stores the result in `dst`.
+///
+/// Each resulting element is saturated, i.e. it is the smallest/largest
+/// number of the type of the element if the `src` data type cannot be
+/// represented as the `dst` type. Source and destination data length is
+/// `width` * `height`. Number of elements is limited to @ref
+/// KLEIDICV_MAX_IMAGE_PIXELS.
+///
+/// @param src          Pointer to the source data. Must be non-null.
+/// @param src_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the source data. Must
+///                     not be less than width * sizeof(type).
+///                     Must be a multiple of sizeof(type).
+/// @param dst          Pointer to the destination data. Must be non-null.
+/// @param dst_stride   Distance in bytes from the start of one row to the
+///                     start of the next row for the destination data. Must
+///                     not be less than width * sizeof(type).
+///                     Must be a multiple of sizeof(type).
+/// @param width        Number of pixels in a row.
+/// @param height       Number of rows in the data.
+///
+KLEIDICV_API_DECLARATION(kleidicv_float_conversion_s8_f32, const int8_t *src,
+                         size_t src_stride, float *dst, size_t dst_stride,
+                         size_t width, size_t height);
+/// @copydoc kleidicv_float_conversion_s8_f32
+KLEIDICV_API_DECLARATION(kleidicv_float_conversion_u8_f32, const uint8_t *src,
+                         size_t src_stride, float *dst, size_t dst_stride,
+                         size_t width, size_t height);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/kleidicv/include/kleidicv/neon_intrinsics.h b/kleidicv/include/kleidicv/neon_intrinsics.h
index 62053bc8e72418f3dc28f51c009c8e5d46d4bd6d..0add004594adfc5058ecbfcbbced35a7bff182b1 100644
--- a/kleidicv/include/kleidicv/neon_intrinsics.h
+++ b/kleidicv/include/kleidicv/neon_intrinsics.h
@@ -349,7 +349,14 @@ static inline float32x4x4_t vld1q_x4(const float32_t *src) { return vld1q_f32_x4
 // NEON store operations
 // -----------------------------------------------------------------------------
 
-static inline void vst1(uint8_t *dst, uint8x8_t vec) { vst1_u8(dst, vec); }
+static inline void vst1(int8_t   *dst, int8x8_t   vec) {  vst1_s8(dst, vec); }
+static inline void vst1(uint8_t  *dst, uint8x8_t  vec) {  vst1_u8(dst, vec); }
+static inline void vst1(int16_t  *dst, int16x4_t  vec) { vst1_s16(dst, vec); }
+static inline void vst1(uint16_t *dst, uint16x4_t vec) { vst1_u16(dst, vec); }
+static inline void vst1(int32_t  *dst, int32x2_t  vec) { vst1_s32(dst, vec); }
+static inline void vst1(uint32_t *dst, uint32x2_t vec) { vst1_u32(dst, vec); }
+static inline void vst1(int64_t  *dst, int64x1_t  vec) { vst1_s64(dst, vec); }
+static inline void vst1(uint64_t *dst, uint64x1_t vec) { vst1_u64(dst, vec); }
 
 static inline void vst1q(int8_t    *dst, int8x16_t   vec) { vst1q_s8(dst, vec); }
 static inline void vst1q(uint8_t   *dst, uint8x16_t  vec) { vst1q_u8(dst, vec); }
@@ -433,6 +440,19 @@ static inline uint64x2_t vreinterpretq_u64(uint32x4_t vec) { return vreinterpret
 static inline uint64x2_t vreinterpretq_u64(int64x2_t  vec) { return vreinterpretq_u64_s64(vec); }
 static inline uint64x2_t vreinterpretq_u64(uint64x2_t vec) { return vec; }
 
+// -----------------------------------------------------------------------------
+// vcombine*
+// -----------------------------------------------------------------------------
+
+static inline int8x16_t  vcombine(int8x8_t   lhs, int8x8_t   rhs) { return vcombine_s8(lhs, rhs); }
+static inline uint8x16_t vcombine(uint8x8_t  lhs, uint8x8_t  rhs) { return vcombine_u8(lhs, rhs); }
+static inline int16x8_t  vcombine(int16x4_t  lhs, int16x4_t  rhs) { return vcombine_s16(lhs, rhs); }
+static inline uint16x8_t vcombine(uint16x4_t lhs, uint16x4_t rhs) { return vcombine_u16(lhs, rhs); }
+static inline int32x4_t  vcombine(int32x2_t  lhs, int32x2_t  rhs) { return vcombine_s32(lhs, rhs); }
+static inline uint32x4_t vcombine(uint32x2_t lhs, uint32x2_t rhs) { return vcombine_u32(lhs, rhs); }
+static inline int64x2_t  vcombine(int64x1_t  lhs, int64x1_t  rhs) { return vcombine_s64(lhs, rhs); }
+static inline uint64x2_t vcombine(uint64x1_t lhs, uint64x1_t rhs) { return vcombine_u64(lhs, rhs); }
+
 // clang-format on
 
 }  // namespace kleidicv::neon
diff --git a/kleidicv/src/conversions/float_conv_api.cpp b/kleidicv/src/conversions/float_conv_api.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0f3fdea2dcc1a00a80a99eba948063d34ea28315
--- /dev/null
+++ b/kleidicv/src/conversions/float_conv_api.cpp
@@ -0,0 +1,71 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "kleidicv/dispatch.h"
+#include "kleidicv/kleidicv.h"
+#include "kleidicv/types.h"
+
+namespace kleidicv {
+
+namespace neon {
+
+template <typename InputType, typename OutputType>
+kleidicv_error_t float_conversion(const InputType* src, size_t src_stride,
+                                  OutputType* dst, size_t dst_stride,
+                                  size_t width, size_t height);
+
+}  // namespace neon
+
+namespace sve2 {
+
+template <typename InputType, typename OutputType>
+kleidicv_error_t float_conversion(const InputType* src, size_t src_stride,
+                                  OutputType* dst, size_t dst_stride,
+                                  size_t width, size_t height);
+
+}  // namespace sve2
+
+namespace sme2 {
+
+template <typename InputType, typename OutputType>
+kleidicv_error_t float_conversion(const InputType* src, size_t src_stride,
+                                  OutputType* dst, size_t dst_stride,
+                                  size_t width, size_t height);
+
+}  // namespace sme2
+
+#ifdef KLEIDICV_HAVE_SVE2
+#define SVE2_FUNC_POINTER(name, itype, otype)                \
+  [[maybe_unused]] static auto sve2_func_##itype##_##otype = \
+      kleidicv::sve2::float_conversion<itype, otype>;
+#else
+#define SVE2_FUNC_POINTER(name, itype, otype)
+#endif  // KLEIDICV_HAVE_SVE2
+
+#ifdef KLEIDICV_HAVE_SME2
+#define SME2_FUNC_POINTER(name, itype, otype) \
+  static auto sme2_func_##itype##_##otype =   \
+      kleidicv::sme2::float_conversion<itype, otype>;
+#else
+#define SME2_FUNC_POINTER(name, itype, otype)
+#endif  // KLEIDICV_HAVE_SME2
+
+// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
+#define KLEIDICV_DEFINE_C_API(name, itype, otype)         \
+  static auto neon_func_##itype##_##otype =               \
+      kleidicv::neon::float_conversion<itype, otype>;     \
+  SVE2_FUNC_POINTER(name, itype, otype);                  \
+  SME2_FUNC_POINTER(name, itype, otype);                  \
+  KLEIDICV_MULTIVERSION_C_API(                            \
+      name, neon_func_##itype##_##otype,                  \
+      KLEIDICV_SVE2_IMPL_IF(sve2_func_##itype##_##otype), \
+      sme2_func_##itype##_##otype)
+// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
+
+KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_f32_s8, float, int8_t);
+KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_f32_u8, float, uint8_t);
+KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_s8_f32, int8_t, float);
+KLEIDICV_DEFINE_C_API(kleidicv_float_conversion_u8_f32, uint8_t, float);
+
+}  // namespace kleidicv
diff --git a/kleidicv/src/conversions/float_conv_neon.cpp b/kleidicv/src/conversions/float_conv_neon.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ba8c17cb6945b8a607dea5149d29e8f8de14a1bc
--- /dev/null
+++ b/kleidicv/src/conversions/float_conv_neon.cpp
@@ -0,0 +1,169 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cmath>
+
+#include "kleidicv/kleidicv.h"
+#include "kleidicv/neon.h"
+
+namespace kleidicv::neon {
+
+template <typename InputType, typename OutputType>
+class float_conversion_operation;
+
+template <typename OutputType>
+class float_conversion_operation<float, OutputType> {
+ public:
+  using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<
+      std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
+  using IntermediateVectorType = typename IntermediateVecTraits::VectorType;
+
+  void process_row(size_t width, Columns<const float> src,
+                   Columns<OutputType> dst) {
+    LoopUnroll{width, SrcVecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) {
+          SrcVectorType src_vector1 = vld1q_f32(&src[0]);
+          SrcVectorType src_vector2 =
+              vld1q_f32(&src[SrcVecTraits::num_lanes()]);
+          IntermediateVectorType result_vector1 =
+              vector_path<OutputType>(src_vector1);
+          IntermediateVectorType result_vector2 =
+              vector_path<OutputType>(src_vector2);
+          vst1(&dst[0], vqmovn(vcombine(vqmovn(result_vector1),
+                                        vqmovn(result_vector2))));
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) {
+          for (size_t index = 0; index < length; ++index) {
+            disable_loop_vectorization();
+            float f = std::nearbyint(src[ptrdiff_t(index)]);
+            if (f > std::numeric_limits<OutputType>::max()) {
+              f = std::numeric_limits<OutputType>::max();
+            } else if (f < std::numeric_limits<OutputType>::min()) {
+              f = std::numeric_limits<OutputType>::min();
+            }
+            dst[index] = static_cast<OutputType>(f);
+          }
+        });
+  }
+
+ private:
+  template <
+      typename O,
+      std::enable_if_t<std::is_integral_v<O> && std::is_signed_v<O>, int> = 0>
+  IntermediateVectorType vector_path(SrcVectorType src) {
+    IntermediateVectorType result = vcvtnq_s32_f32(src);
+    return result;
+  }
+
+  template <
+      typename O,
+      std::enable_if_t<std::is_integral_v<O> && !std::is_signed_v<O>, int> = 0>
+  IntermediateVectorType vector_path(SrcVectorType src) {
+    IntermediateVectorType result = vcvtnq_u32_f32(src);
+    return result;
+  }
+};  // end of class float_conversion_operation<float, OutputType>
+
+template <typename InputType>
+class float_conversion_operation<InputType, float> {
+ public:
+  using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<InputType>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using DstVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
+  using DstVectorType = typename DstVecTraits::VectorType;
+  using DstVector4Type = typename DstVecTraits::Vector4Type;
+
+  void process_row(size_t width, Columns<const InputType> src,
+                   Columns<float> dst) {
+    LoopUnroll{width, SrcVecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) {
+          DstVector4Type result_vector1 =
+              vector_path<InputType>(vld1q(&src[0]));
+          DstVector4Type result_vector2 =
+              vector_path<InputType>(vld1q(&src[SrcVecTraits::num_lanes()]));
+          vst1q_f32_x4(&dst[0], result_vector1);
+          vst1q_f32_x4(&dst[DstVecTraits::num_lanes() * 4], result_vector2);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) {
+          for (size_t index = 0; index < length; ++index) {
+            disable_loop_vectorization();
+            InputType n = src[ptrdiff_t(index)];
+            dst[ptrdiff_t(index)] = static_cast<float>(n);
+          }
+        });
+  }
+
+ private:
+  template <
+      typename I,
+      std::enable_if_t<std::is_integral_v<I> && std::is_signed_v<I>, int> = 0>
+  DstVector4Type vector_path(const SrcVectorType src) {
+    DstVector4Type dst_vect;
+    int16x8_t low = vmovl_s8(vget_low_s8(src));
+    int16x8_t hi = vmovl_high_s8(src);
+    int32x4_t lowlow = vmovl_s16(vget_low_s16(low));
+    int32x4_t lowhi = vmovl_high_s16(low);
+    int32x4_t hilow = vmovl_s16(vget_low_s16(hi));
+    int32x4_t hihi = vmovl_high_s16(hi);
+    dst_vect.val[0] = vcvtq_f32_s32(lowlow);
+    dst_vect.val[1] = vcvtq_f32_s32(lowhi);
+    dst_vect.val[2] = vcvtq_f32_s32(hilow);
+    dst_vect.val[3] = vcvtq_f32_s32(hihi);
+    return dst_vect;
+  }
+
+  template <
+      typename I,
+      std::enable_if_t<std::is_integral_v<I> && !std::is_signed_v<I>, int> = 0>
+  DstVector4Type vector_path(const SrcVectorType src) {
+    DstVector4Type dst_vect;
+    uint16x8_t low = vmovl_u8(vget_low_u8(src));
+    uint16x8_t hi = vmovl_high_u8(src);
+    uint32x4_t lowlow = vmovl_u16(vget_low_u16(low));
+    uint32x4_t lowhi = vmovl_high_u16(low);
+    uint32x4_t hilow = vmovl_u16(vget_low_u16(hi));
+    uint32x4_t hihi = vmovl_high_u16(hi);
+    dst_vect.val[0] = vcvtq_f32_u32(lowlow);
+    dst_vect.val[1] = vcvtq_f32_u32(lowhi);
+    dst_vect.val[2] = vcvtq_f32_u32(hilow);
+    dst_vect.val[3] = vcvtq_f32_u32(hihi);
+    return dst_vect;
+  }
+};  // end of class float_conversion_operation<InputType, float>
+
+template <typename InputType, typename OutputType>
+kleidicv_error_t float_conversion(const InputType* src, size_t src_stride,
+                                  OutputType* dst, size_t dst_stride,
+                                  size_t width, size_t height) {
+  CHECK_POINTER_AND_STRIDE(src, src_stride, height);
+  CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
+  CHECK_IMAGE_SIZE(width, height);
+
+  float_conversion_operation<InputType, OutputType> operation;
+  Rectangle rect{width, height};
+  Rows<const InputType> src_rows{src, src_stride};
+  Rows<OutputType> dst_rows{dst, dst_stride};
+  zip_rows(operation, rect, src_rows, dst_rows);
+
+  return KLEIDICV_OK;
+}
+
+#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype)                           \
+  template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t                          \
+  float_conversion<itype, otype>(const itype* src, size_t src_stride,         \
+                                 otype* dst, size_t dst_stride, size_t width, \
+                                 size_t height)
+
+KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float);
+KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float);
+
+}  // namespace kleidicv::neon
diff --git a/kleidicv/src/conversions/float_conv_sc.h b/kleidicv/src/conversions/float_conv_sc.h
new file mode 100644
index 0000000000000000000000000000000000000000..d603d0c31bf2d5e879b3e80c90c5bfa83d1fd162
--- /dev/null
+++ b/kleidicv/src/conversions/float_conv_sc.h
@@ -0,0 +1,180 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef KLEIDICV_FLOAT_CONV_SC_H
+#define KLEIDICV_FLOAT_CONV_SC_H
+
+#include <limits>
+#include <type_traits>
+
+#include "kleidicv/kleidicv.h"
+#include "kleidicv/sve2.h"
+
+namespace KLEIDICV_TARGET_NAMESPACE {
+
+template <typename InputType, typename OutputType>
+class float_conversion_operation;
+
+template <typename OutputType>
+class float_conversion_operation<float, OutputType> {
+ public:
+  using SrcVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
+  using SrcVectorType = typename SrcVecTraits::VectorType;
+  using IntermediateVecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<
+      std::conditional_t<std::is_signed_v<OutputType>, int32_t, uint32_t>>;
+  using IntermediateVectorType = typename IntermediateVecTraits::VectorType;
+
+  void process_row(size_t width, Columns<const float> src,
+                   Columns<OutputType> dst) KLEIDICV_STREAMING_COMPATIBLE {
+    LoopUnroll{width, SrcVecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE {
+          svbool_t pg = SrcVecTraits::svptrue();
+          SrcVectorType src_vector1 = svld1(pg, &src[0]);
+          SrcVectorType src_vector2 = svld1_vnum(pg, &src[0], 1);
+          IntermediateVectorType result_vector1 =
+              vector_path<OutputType>(pg, src_vector1);
+          IntermediateVectorType result_vector2 =
+              vector_path<OutputType>(pg, src_vector2);
+          svst1b(pg, &dst[0], result_vector1);
+          svst1b_vnum(pg, &dst[0], 1, result_vector2);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE {
+          size_t index = 0;
+          svbool_t pg = SrcVecTraits::svwhilelt(index, length);
+          while (svptest_first(SrcVecTraits::svptrue(), pg)) {
+            SrcVectorType src_vector = svld1(pg, &src[ptrdiff_t(index)]);
+            IntermediateVectorType result_vector =
+                vector_path<OutputType>(pg, src_vector);
+            svst1b(pg, &dst[ptrdiff_t(index)], result_vector);
+            // Update loop counter and calculate the next governing predicate.
+            index += SrcVecTraits::num_lanes();
+            pg = SrcVecTraits::svwhilelt(index, length);
+          }
+        });
+  }
+
+ private:
+  template <
+      typename O,
+      std::enable_if_t<std::is_integral_v<O> && std::is_signed_v<O>, int> = 0>
+  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    constexpr float min_val = std::numeric_limits<O>::min();
+    constexpr float max_val = std::numeric_limits<O>::max();
+
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t less = svcmplt_n_f32(pg, src, min_val);
+    src = svdup_n_f32_m(src, less, min_val);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
+    src = svdup_n_f32_m(src, greater, max_val);
+
+    return svcvt_s32_f32_x(pg, src);
+  }
+
+  template <
+      typename O,
+      std::enable_if_t<std::is_integral_v<O> && !std::is_signed_v<O>, int> = 0>
+  IntermediateVectorType vector_path(svbool_t& pg, SrcVectorType src)
+      KLEIDICV_STREAMING_COMPATIBLE {
+    constexpr float max_val = std::numeric_limits<O>::max();
+
+    src = svrinti_f32_x(pg, src);
+
+    svbool_t greater = svcmpgt_n_f32(pg, src, max_val);
+    src = svdup_n_f32_m(src, greater, max_val);
+
+    return svcvt_u32_f32_x(pg, src);
+  }
+};  // end of class float_conversion_operation<float, OutputType>
+
+template <typename InputType>
+class float_conversion_operation<InputType, float> {
+ public:
+  using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits<float>;
+  using VectorType = typename VecTraits::VectorType;
+  void process_row(size_t width, Columns<const InputType> src,
+                   Columns<float> dst) {
+    LoopUnroll{width, VecTraits::num_lanes()}
+        .unroll_twice([&](size_t step) KLEIDICV_STREAMING_COMPATIBLE {
+          svbool_t pg = VecTraits::svptrue();
+          auto src_vect1 = load_src(pg, &src[0], 0);
+          auto src_vect2 = load_src(pg, &src[0], 1);
+
+          VectorType dst_vector1 = vector_path(pg, src_vect1);
+          VectorType dst_vector2 = vector_path(pg, src_vect2);
+          svst1(pg, &dst[0], dst_vector1);
+          svst1_vnum(pg, &dst[0], 1, dst_vector2);
+          src += ptrdiff_t(step);
+          dst += ptrdiff_t(step);
+        })
+        .remaining([&](size_t length, size_t) KLEIDICV_STREAMING_COMPATIBLE {
+          size_t index = 0;
+          svbool_t pg = VecTraits::svwhilelt(index, length);
+          while (svptest_first(VecTraits::svptrue(), pg)) {
+            auto src_vect = load_src(pg, &src[ptrdiff_t(index)], 0);
+            VectorType dst_vector = vector_path(pg, src_vect);
+            svst1(pg, &dst[ptrdiff_t(index)], dst_vector);
+            // Update loop counter and calculate the next governing predicate.
+            index += VecTraits::num_lanes();
+            pg = VecTraits::svwhilelt(index, length);
+          }
+        });
+  }
+
+ private:
+  template <typename I, std::enable_if_t<std::is_same_v<I, svint32_t>, int> = 0>
+  VectorType vector_path(svbool_t& pg,
+                         I src_vector) KLEIDICV_STREAMING_COMPATIBLE {
+    return svcvt_f32_s32_x(pg, src_vector);
+  }
+  template <typename I,
+            std::enable_if_t<std::is_same_v<I, svuint32_t>, int> = 0>
+  VectorType vector_path(svbool_t& pg,
+                         I src_vector) KLEIDICV_STREAMING_COMPATIBLE {
+    return svcvt_f32_u32_x(pg, src_vector);
+  }
+
+  template <
+      typename I,
+      std::enable_if_t<std::is_integral_v<I> && std::is_signed_v<I>, int> = 0>
+  svint32_t load_src(svbool_t& pg, const I* src,
+                     size_t vnum) KLEIDICV_STREAMING_COMPATIBLE {
+    svint32_t src_vect = svld1sb_vnum_s32(pg, src, vnum);
+    return src_vect;
+  }
+
+  template <
+      typename I,
+      std::enable_if_t<std::is_integral_v<I> && !std::is_signed_v<I>, int> = 0>
+  svuint32_t load_src(svbool_t& pg, const I* src,
+                      size_t vnum) KLEIDICV_STREAMING_COMPATIBLE {
+    svuint32_t src_vect = svld1ub_vnum_u32(pg, src, vnum);
+    return src_vect;
+  }
+};  // end of class float_conversion_operation<InputType, float>
+
+template <typename InputType, typename OutputType>
+static kleidicv_error_t float_conversion_sc(
+    const InputType* src, size_t src_stride, OutputType* dst, size_t dst_stride,
+    size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE {
+  CHECK_POINTER_AND_STRIDE(src, src_stride, height);
+  CHECK_POINTER_AND_STRIDE(dst, dst_stride, height);
+  CHECK_IMAGE_SIZE(width, height);
+
+  float_conversion_operation<InputType, OutputType> operation;
+  Rectangle rect{width, height};
+  Rows<const InputType> src_rows{src, src_stride};
+  Rows<OutputType> dst_rows{dst, dst_stride};
+  zip_rows(operation, rect, src_rows, dst_rows);
+
+  return KLEIDICV_OK;
+}
+
+}  // namespace KLEIDICV_TARGET_NAMESPACE
+
+#endif  // KLEIDICV_FLOAT_CONV_SC_H
diff --git a/kleidicv/src/conversions/float_conv_sme2.cpp b/kleidicv/src/conversions/float_conv_sme2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..49832008a06f080bdd47d0972b883f618fd801a2
--- /dev/null
+++ b/kleidicv/src/conversions/float_conv_sme2.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_conv_sc.h"
+
+namespace kleidicv::sme2 {
+
+template <typename InputType, typename OutputType>
+KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
+float_conversion(const InputType* src, size_t src_stride, OutputType* dst,
+                 size_t dst_stride, size_t width, size_t height) {
+  return float_conversion_sc<InputType, OutputType>(src, src_stride, dst,
+                                                    dst_stride, width, height);
+}
+
+#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype)                           \
+  template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t                          \
+  float_conversion<itype, otype>(const itype* src, size_t src_stride,         \
+                                 otype* dst, size_t dst_stride, size_t width, \
+                                 size_t height)
+
+KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float);
+KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float);
+
+}  // namespace kleidicv::sme2
diff --git a/kleidicv/src/conversions/float_conv_sve2.cpp b/kleidicv/src/conversions/float_conv_sve2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6bbd5d72e12ff1118dafcbe756834df90c388121
--- /dev/null
+++ b/kleidicv/src/conversions/float_conv_sve2.cpp
@@ -0,0 +1,28 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "float_conv_sc.h"
+
+namespace kleidicv::sve2 {
+
+template <typename InputType, typename OutputType>
+KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t
+float_conversion(const InputType* src, size_t src_stride, OutputType* dst,
+                 size_t dst_stride, size_t width, size_t height) {
+  return float_conversion_sc<InputType, OutputType>(src, src_stride, dst,
+                                                    dst_stride, width, height);
+}
+
+#define KLEIDICV_INSTANTIATE_TEMPLATE(itype, otype)                           \
+  template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t                          \
+  float_conversion<itype, otype>(const itype* src, size_t src_stride,         \
+                                 otype* dst, size_t dst_stride, size_t width, \
+                                 size_t height)
+
+KLEIDICV_INSTANTIATE_TEMPLATE(float, int8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(float, uint8_t);
+KLEIDICV_INSTANTIATE_TEMPLATE(int8_t, float);
+KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t, float);
+
+}  // namespace kleidicv::sve2
diff --git a/test/api/test_float_conv.cpp b/test/api/test_float_conv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5f3b0dda38e7f59bb6937f74cdedfdac373bcca5
--- /dev/null
+++ b/test/api/test_float_conv.cpp
@@ -0,0 +1,462 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include "framework/array.h"
+#include "framework/generator.h"
+#include "framework/operation.h"
+#include "framework/utils.h"
+#include "kleidicv/kleidicv.h"
+#include "test_config.h"
+
+#define KLEIDICV_FLOAT_CONVERSION(itype, itype_name, otype, otype_name)        \
+  KLEIDICV_API_DIFFERENT_IO_TYPES(                                             \
+      float_conversion, kleidicv_float_conversion_##itype_name##_##otype_name, \
+      itype, otype)
+
+KLEIDICV_FLOAT_CONVERSION(float, f32, int8_t, s8);
+KLEIDICV_FLOAT_CONVERSION(float, f32, uint8_t, u8);
+KLEIDICV_FLOAT_CONVERSION(int8_t, s8, float, f32);
+KLEIDICV_FLOAT_CONVERSION(uint8_t, u8, float, f32);
+
+template <typename InputType, typename OutputType>
+class FloatConversionTest final {
+ private:
+  template <typename T>
+  static constexpr T min() {
+    return std::numeric_limits<T>::min();
+  }
+
+  template <typename T>
+  static constexpr T max() {
+    return std::numeric_limits<T>::max();
+  }
+
+  struct Elements {
+    size_t width;
+    size_t height;
+
+    std::vector<std::vector<InputType>> source_rows;
+    std::vector<std::vector<OutputType>> expected_rows;
+
+    Elements(size_t _width, size_t _height,
+             std::vector<std::vector<InputType>>&& _source_rows,
+             std::vector<std::vector<OutputType>>&& _expected_rows)
+        : width(_width),
+          height(_height),
+          source_rows(std::move(_source_rows)),
+          expected_rows(std::move(_expected_rows)) {}
+  };
+
+  struct Values {
+    InputType source;
+    OutputType expected;
+  };
+
+  static float floatval(uint32_t v) {
+    float result;  // Avoid cppcoreguidelines-init-variables. NOLINT
+    static_assert(sizeof(result) == sizeof(v));
+    memcpy(&result, &v, sizeof(result));
+    return result;
+  }
+
+  const float quietNaN = std::numeric_limits<float>::quiet_NaN();
+  const float signalingNaN = std::numeric_limits<float>::signaling_NaN();
+  const float posInfinity = std::numeric_limits<float>::infinity();
+  const float negInfinity = -std::numeric_limits<float>::infinity();
+
+  const float minusNaN = floatval(0xFF800001);
+  const float plusNaN = floatval(0x7F800001);
+  const float plusZero = 0.0F;
+  const float minusZero = -0.0F;
+
+  const float oneNaN = floatval(0x7FC00001);
+  const float zeroDivZero = -std::numeric_limits<float>::quiet_NaN();
+  const float floatMin = std::numeric_limits<float>::min();
+  const float floatMax = std::numeric_limits<float>::max();
+
+  const float posSubnormalMin = std::numeric_limits<float>::denorm_min();
+  const float posSubnormalMax = floatval(0x007FFFFF);
+  const float negSubnormalMin = -std::numeric_limits<float>::denorm_min();
+  const float negSubnormalMax = floatval(0x807FFFFF);
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<int8_t, O>, bool> = true>
+  const Elements& get_custom_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        4, 8,
+        {{
+          { quietNaN, signalingNaN, posInfinity, negInfinity },
+          { minusNaN, plusNaN, plusZero, minusZero },
+          { oneNaN, zeroDivZero, floatMin, floatMax },
+          { posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax },
+          { 1111.11, -1112.22, 113.33, 114.44 },
+          { 111.51, 112.62, 113.73, 114.84 },
+          { 126.66, 127.11, 128.66, 129.11 },
+          { 11.5, 12.5, -11.5, -12.5 }
+        }},
+        {{
+          { 0, 0, 127, -128 },
+          { 0, 0, 0, 0 },
+          { 0, 0, 0, 127 },
+          { 0, 0, 0, 0 },
+          { 127, -128, 113, 114 },
+          { 112, 113, 114, 115 },
+          { 127, 127, 127, 127 },
+          { 12, 12, -12, -12 }
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<uint8_t, O>, bool> = true>
+  const Elements& get_custom_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        4, 8,
+        {{
+          { quietNaN, signalingNaN, posInfinity, negInfinity },
+          { minusNaN, plusNaN, plusZero, minusZero },
+          { oneNaN, zeroDivZero, floatMin, floatMax },
+          { posSubnormalMin, posSubnormalMax, negSubnormalMin, negSubnormalMax },
+          { 1111.11, -1112.22, 113.33, 114.44 },
+          { 111.51, 112.62, 113.73, 114.84 },
+          { 126.66, 127.11, 128.66, 129.11 },
+          { 11.5, 12.5, -11.5, -12.5 }
+        }},
+        {{
+          { 0, 0, 255, 0 },
+          { 0, 0, 0, 0 },
+          { 0, 0, 0, 255 },
+          { 0, 0, 0, 0 },
+          { 255, 0, 113, 114 },
+          { 112, 113, 114, 115 },
+          { 127, 127, 129, 129 },
+          { 12, 12, 0, 0 }
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_integral_v<I>, bool> = true,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  const Elements& get_custom_elements() {
+    static const Elements kTestElements = {
+        // clang-format off
+        5, 1,
+        {{
+          { min<I>(), min<I>() + 1, 0, max<I>() - 1, max<I>() }
+        }},
+        {{
+          { static_cast<float>(min<I>()), static_cast<float>(min<I>()) + 1.0, 0,
+            static_cast<float>(max<I>()) - 1.0, static_cast<float>(max<I>()) }
+        }}
+        // clang-format on
+    };
+    return kTestElements;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<int8_t, O>, bool> = true>
+  const Values& get_values() {
+    static const Values kTestValues = {
+        // clang-format off
+        -1000.67F, -128
+        // clang-format on
+    };
+    return kTestValues;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<uint8_t, O>, bool> = true>
+  const Values& get_values() {
+    static const Values kTestValues = {
+        // clang-format off
+        -1000.67F, 0
+        // clang-format on
+    };
+    return kTestValues;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<int8_t, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  const Values& get_values() {
+    static const Values kTestValues = {
+        // clang-format off
+        -127, -127.0
+        // clang-format on
+    };
+    return kTestValues;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<uint8_t, I>, bool> = true,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  const Values& get_values() {
+    static const Values kTestValues = {
+        // clang-format off
+        255, 255.0
+        // clang-format on
+    };
+    return kTestValues;
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_same_v<float, I>, bool> = true,
+            std::enable_if_t<std::is_integral_v<O>, bool> = true>
+  void calculate_expected(const test::Array2D<I>& source,
+                          test::Array2D<O>& expected) {
+    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
+      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
+        O calculated = 0;
+        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
+        I result = *source.at(hindex, vindex);
+        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
+        if (result > max<O>()) {
+          calculated = max<O>();
+        } else if (result < min<O>()) {
+          calculated = min<O>();
+        } else {
+          calculated = result;
+        }
+        *expected.at(hindex, vindex) = calculated;
+      }
+    }
+  }
+
+  template <typename I, typename O,
+            std::enable_if_t<std::is_integral_v<I>, bool> = true,
+            std::enable_if_t<std::is_same_v<float, O>, bool> = true>
+  void calculate_expected(const test::Array2D<I>& source,
+                          test::Array2D<OutputType>& expected) {
+    for (size_t hindex = 0; hindex < source.height(); ++hindex) {
+      for (size_t vindex = 0; vindex < source.width(); ++vindex) {
+        // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign)
+        *expected.at(hindex, vindex) = *source.at(hindex, vindex);
+        // NOLINTEND(clang-analyzer-core.uninitialized.Assign)
+      }
+    }
+  }
+
+  template <typename T>
+  size_t get_linear_height(size_t width, size_t minimum_size) {
+    size_t image_size =
+        std::max(minimum_size, static_cast<size_t>(max<T>() - min<T>()));
+    size_t height = image_size / width + 1;
+
+    return height;
+  }
+
+  template <typename I, typename O>
+  std::tuple<test::Array2D<I>, test::Array2D<O>, test::Array2D<O>>
+  get_linear_arrays(size_t width, size_t height) {
+    test::Array2D<I> source(width, height, 1, 1);
+    test::Array2D<O> expected(width, height, 1, 1);
+    test::Array2D<O> actual(width, height, 1, 1);
+
+    if constexpr (std::is_same_v<float, I> && std::is_integral_v<O>) {
+      test::GenerateLinearSeries<I> generator(min<O>());
+      source.fill(generator);
+    } else if constexpr (std::is_integral_v<I> && std::is_same_v<float, O>) {
+      test::GenerateLinearSeries<I> generator(min<I>());
+      source.fill(generator);
+    } else {
+      static_assert(sizeof(I) == 0 && sizeof(O) == 0, "should never happen");
+    }
+
+    calculate_expected<I, O>(source, expected);
+
+    return {source, expected, actual};
+  }
+
+ public:
+  // minimum_size set by caller to trigger the 'big' conversion path.
+  template <typename I, typename O>
+  void test_linear(size_t width, size_t minimum_size = 1) {
+    size_t height = 0;
+    if constexpr (std::is_same_v<float, I> && std::is_integral_v<O>) {
+      height = get_linear_height<O>(width, minimum_size);
+    } else if constexpr (std::is_integral_v<I> && std::is_same_v<float, O>) {
+      height = get_linear_height<I>(width, minimum_size);
+    } else {
+      static_assert(sizeof(I) == 0 && sizeof(O) == 0, "should never happen");
+    }
+
+    auto arrays = get_linear_arrays<I, O>(width, height);
+
+    test::Array2D<I>& source = std::get<0>(arrays);
+    test::Array2D<O>& expected = std::get<1>(arrays);
+    test::Array2D<O>& actual = std::get<2>(arrays);
+
+    ASSERT_EQ(KLEIDICV_OK, (float_conversion<I, O>()(
+                               source.data(), source.stride(), actual.data(),
+                               actual.stride(), width, height)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test_custom() {
+    auto elements_list = get_custom_elements<InputType, OutputType>();
+    const size_t& width = elements_list.width;
+    const size_t& height = elements_list.height;
+
+    test::Array2D<InputType> source(width, height);
+    test::Array2D<OutputType> expected(width, height);
+    test::Array2D<OutputType> actual(width, height);
+
+    for (size_t i = 0; i < height; i++) {
+      source.set(i, 0, elements_list.source_rows[i]);
+      expected.set(i, 0, elements_list.expected_rows[i]);
+    }
+
+    ASSERT_EQ(KLEIDICV_OK, (float_conversion<InputType, OutputType>()(
+                               source.data(), source.stride(), actual.data(),
+                               actual.stride(), width, height)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+
+  void test_sizing(const size_t width, const size_t height) {
+    auto values_list = get_values<InputType, OutputType>();
+
+    test::Array2D<InputType> source(width, height, 1, 1);
+    test::Array2D<OutputType> expected(width, height, 1, 1);
+    test::Array2D<OutputType> actual(width, height, 1, 1);
+
+    source.fill(values_list.source);
+    expected.fill(values_list.expected);
+
+    actual.fill(0);
+
+    ASSERT_EQ(KLEIDICV_OK, (float_conversion<InputType, OutputType>()(
+                               source.data(), source.stride(), actual.data(),
+                               actual.stride(), width, height)));
+
+    EXPECT_EQ_ARRAY2D(expected, actual);
+  }
+};  // end of class FloatConversionTest
+
+template <typename ElementType>
+class FloatConversion : public testing::Test {};
+
+using ElementTypes =
+    ::testing::Types<std::pair<float, int8_t>, std::pair<float, uint8_t>,
+                     std::pair<int8_t, float>, std::pair<uint8_t, float>>;
+
+// Tests kleidicv_float_conversion API.
+TYPED_TEST_SUITE(FloatConversion, ElementTypes);
+
+TYPED_TEST(FloatConversion, NullPointer) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  InputType src[1] = {};
+  OutputType dst[1];
+  test::test_null_args(float_conversion<InputType, OutputType>(), src,
+                       sizeof(InputType), dst, sizeof(OutputType), 1, 1);
+}
+
+TYPED_TEST(FloatConversion, OversizeImage) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  InputType src[1] = {};
+  OutputType dst[1];
+  EXPECT_EQ(KLEIDICV_ERROR_RANGE,
+            (float_conversion<InputType, OutputType>()(
+                src, sizeof(InputType), dst, sizeof(OutputType),
+                KLEIDICV_MAX_IMAGE_PIXELS + 1, 1)));
+  EXPECT_EQ(KLEIDICV_ERROR_RANGE,
+            (float_conversion<InputType, OutputType>()(
+                src, sizeof(InputType), dst, sizeof(OutputType), 1,
+                KLEIDICV_MAX_IMAGE_PIXELS + 1)));
+  EXPECT_EQ(KLEIDICV_ERROR_RANGE,
+            (float_conversion<InputType, OutputType>()(
+                src, sizeof(TypeParam), dst, sizeof(OutputType),
+                KLEIDICV_MAX_IMAGE_PIXELS + 1, KLEIDICV_MAX_IMAGE_PIXELS + 1)));
+}
+
+TYPED_TEST(FloatConversion, Scalar) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}
+      .template test_linear<InputType, OutputType>(
+          test::Options::vector_length() - 1);
+}
+TYPED_TEST(FloatConversion, Vector) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}
+      .template test_linear<InputType, OutputType>(
+          test::Options::vector_length() * 2);
+}
+TYPED_TEST(FloatConversion, Custom) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_custom();
+}
+TYPED_TEST(FloatConversion, SizingFits128VectorSize) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(4, 1);
+}
+TYPED_TEST(FloatConversion, SizingFits128VectorSize2x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(4, 2);
+}
+TYPED_TEST(FloatConversion, SizingFits128VectorSize3x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(12, 1);
+}
+TYPED_TEST(FloatConversion, SizingFits512VectorSize) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(4, 4);
+}
+TYPED_TEST(FloatConversion, SizingFits512VectorSize2x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(16, 2);
+}
+TYPED_TEST(FloatConversion, SizingFits512VectorSize3x) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(6, 8);
+}
+TYPED_TEST(FloatConversion, Sizing128OneRemaining) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(1, 17);
+}
+TYPED_TEST(FloatConversion, Sizing128AllButOneRemaining) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(5, 3);
+}
+TYPED_TEST(FloatConversion, SizingAboutHalfRemaining) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(38, 1);
+}
+TYPED_TEST(FloatConversion, SizingEmpty) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(0, 0);
+}
+TYPED_TEST(FloatConversion, SizingOne) {
+  using InputType = typename TypeParam::first_type;
+  using OutputType = typename TypeParam::second_type;
+  FloatConversionTest<InputType, OutputType>{}.test_sizing(1, 1);
+}