diff --git a/CHANGELOG.md b/CHANGELOG.md
index cb86f428d22704687b8c7d2cf6d287ff744f9685..1f24b367ab074f5c0fb3ec4efebb223e369638b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ This changelog aims to follow the guiding principles of
   - 2-channel s16 and s16+u16 fixed-point coordinates and 1-channel u8 input.
   - 2-channel s16 + 5+5 bits' fractions fixed-point coordinates and 1-channel u8 input.
 - Implementation for cv::pyrDown in the OpenCV HAL.
+- Sum function for 1 channel float.
 
 ## 0.2.0 - 2024-09-30
 
diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp
index 01b33c8b00c5b6918bb1d340d6539c73fd72edaf..cb3f491b54603a731f8d8cce491af3d19b037568 100644
--- a/adapters/opencv/kleidicv_hal.cpp
+++ b/adapters/opencv/kleidicv_hal.cpp
@@ -962,6 +962,27 @@ int transpose(const uchar *src_data, size_t src_step, uchar *dst_data,
       static_cast<size_t>(element_size)));
 }
 
+int sum(const uchar *src_data, size_t src_step, int src_type, size_t width,
+        size_t height, double *result) {
+  size_t channels = (src_type >> CV_CN_SHIFT) + 1;
+
+  if (channels != 1) {
+    return CV_HAL_ERROR_NOT_IMPLEMENTED;
+  }
+
+  switch (CV_MAT_DEPTH(src_type)) {
+    case CV_32F:
+      float result_float = 0;
+      kleidicv_error_t err =
+          kleidicv_sum_f32(reinterpret_cast<const float *>(src_data), src_step,
+                           width, height, &result_float);
+      *result = result_float;
+      return convert_error(err);
+  }
+
+  return CV_HAL_ERROR_NOT_IMPLEMENTED;
+}
+
 template <typename T, typename SingleThreadFunc, typename MultithreadFunc>
 kleidicv_error_t call_min_max(SingleThreadFunc min_max_func_st,
                               MultithreadFunc min_max_func_mt,
diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h
index 70f92d20f98de4eda01ee8ef5dcab251a429d9b3..e628acd42f824457d77460bce7234b024b64c00b 100644
--- a/adapters/opencv/kleidicv_hal.h
+++ b/adapters/opencv/kleidicv_hal.h
@@ -123,6 +123,9 @@ int pyrdown(const uchar *src_data, size_t src_step, int src_width,
 int transpose(const uchar *src_data, size_t src_step, uchar *dst_data,
               size_t dst_step, int src_width, int src_height, int element_size);
 
+int sum(const uchar *src_data, size_t src_step, int src_type, size_t width,
+        size_t height, double *result);
+
 int min_max_idx(const uchar *src_data, size_t src_stride, int width, int height,
                 int depth, double *min_value, double *max_value, int *min_index,
                 int *max_index, uchar *mask);
@@ -430,6 +433,19 @@ static inline int kleidicv_transpose_with_fallback(
 #undef cv_hal_transpose2d
 #define cv_hal_transpose2d kleidicv_transpose_with_fallback
 
+// sum
+#ifdef cv_hal_sum
+static inline int kleidicv_sum_with_fallback(const uchar *src_data,
+                                             size_t src_step, int src_type,
+                                             size_t width, size_t height,
+                                             double *result) {
+  return KLEIDICV_HAL_FALLBACK_FORWARD(sum, cv_hal_sum, src_data, src_step,
+                                       src_type, width, height, result);
+}
+#undef cv_hal_sum
+#define cv_hal_sum kleidicv_sum_with_fallback
+#endif  // cv_hal_sum
+
 // min_max_idx
 static inline int kleidicv_min_max_idx_with_fallback(
     const uchar *src_data, size_t src_stride, int width, int height, int depth,
diff --git a/adapters/opencv/opencv-4.10.patch b/adapters/opencv/opencv-4.10.patch
index 23d1dd320c4c50f1ef2454d7c16426c2be67251c..211a472ab18c8fbc96178a1d7ff571dde66a5ce7 100644
--- a/adapters/opencv/opencv-4.10.patch
+++ b/adapters/opencv/opencv-4.10.patch
@@ -19,7 +19,7 @@ index 2b4035285f..729cd1dd43 100644
 @@ -281,6 +281,11 @@ void Mat::convertTo(OutputArray dst, int type_, double alpha, double beta) const
      dst.create(dims, size, dtype);
      Mat dstMat = dst.getMat();
-
+ 
 +    if( dims <= 2 ) {
 +        int width_in_elements = src.cols * cn;
 +        CALL_HAL(convertTo, cv_hal_convertTo, src.data, src.step, src.depth(), dstMat.data, dstMat.step, dstMat.depth(), width_in_elements, src.rows, alpha, beta);
@@ -29,13 +29,25 @@ index 2b4035285f..729cd1dd43 100644
      double scale[] = {alpha, beta};
      CV_Assert( func != 0 );
 diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp
-index f78608dbad..299b5e54bd 100644
+index f78608dbad..a9384588ec 100644
 --- a/modules/core/src/hal_replacement.hpp
 +++ b/modules/core/src/hal_replacement.hpp
-@@ -953,6 +953,41 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst
+@@ -953,6 +953,53 @@ inline int hal_ni_transpose2d(const uchar* src_data, size_t src_step, uchar* dst
  #define cv_hal_transpose2d hal_ni_transpose2d
  //! @endcond
-
+ 
++/**
++   @brief sum
++   @param src_data,src_step,src_type Source image
++   @param width,height Source image dimensions
++   @param result Pointer to save the sum result to.
++*/
++inline int hal_ni_sum(const uchar *src_data, size_t src_step, int src_type, size_t width, size_t height, double *result) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
++
++//! @cond IGNORED
++#define cv_hal_sum hal_ni_sum
++//! @endcond
++
 +/**
 +   @brief convertTo
 +   @param src_data,src_step,src_depth Source image
@@ -72,8 +84,8 @@ index f78608dbad..299b5e54bd 100644
 +//! @endcond
 +
  //! @}
-
-
+ 
+ 
 diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp
 index 8c6d8ad9a9..47eb6fdb66 100644
 --- a/modules/core/src/minmax.cpp
@@ -90,14 +102,29 @@ index 8c6d8ad9a9..47eb6fdb66 100644
              return;
          }
          else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED)
+diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp
+index fade948336..17b40ca0e8 100644
+--- a/modules/core/src/sum.dispatch.cpp
++++ b/modules/core/src/sum.dispatch.cpp
+@@ -199,6 +199,10 @@ Scalar sum(InputArray _src)
+     CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res);
+ 
+     int k, cn = src.channels(), depth = src.depth();
++
++    double result = 0;
++    CALL_HAL_RET(sum, cv_hal_sum, result, src.data, src.step, src.type(), src.cols, src.rows);
++
+     SumFunc func = getSumFunc(depth);
+     CV_Assert( cn <= 4 && func != 0 );
+ 
 diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp
-index 773fed9b48..145d653f5d 100644
+index 773fed9b48..b74ff70f99 100644
 --- a/modules/imgproc/src/hal_replacement.hpp
 +++ b/modules/imgproc/src/hal_replacement.hpp
 @@ -328,6 +328,60 @@ inline int hal_ni_remap32f(int src_type, const uchar *src_data, size_t src_step,
  #define cv_hal_remap32f hal_ni_remap32f
  //! @endcond
-
+ 
 +/**
 +   @brief hal_remap with a short integer map
 +   @param src_type source and destination image type
@@ -172,7 +199,7 @@ index d7c9c64c3c..348208b72d 100644
 +        CALL_HAL(remap16s16u, cv_hal_remap16s16u, src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
 +                 map1.ptr<short>(), map1.step, map2.ptr<unsigned short>(), map2.step, borderType, borderValue.val);
      }
-
+ 
      interpolation &= ~WARP_RELATIVE_MAP;
 diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp
 index d0f50a73bb..1c308887dc 100644
@@ -181,7 +208,7 @@ index d0f50a73bb..1c308887dc 100644
 @@ -654,6 +654,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
              ocl_GaussianBlur_8UC1(_src, _dst, ksize, CV_MAT_DEPTH(type), kx, ky, borderType)
      );
-
+ 
 +    {
 +        Mat src = _src.getMat();
 +        Mat dst = _dst.getMat();
diff --git a/conformity/opencv/test_sum.cpp b/conformity/opencv/test_sum.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a90691ee5f904f871a69198d2b4f771d127deb2d
--- /dev/null
+++ b/conformity/opencv/test_sum.cpp
@@ -0,0 +1,49 @@
+// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <limits>
+#include <vector>
+
+#include "tests.h"
+
+template <typename T, size_t Format>
+cv::Mat exec_sum(cv::Mat& input) {
+  // If sum is implemented for multi channels, these dimensions must be modified
+  cv::Mat result(1, 1, Format, cv::sum(input));
+  return result;
+}
+
+#if MANAGER
+template <typename T, size_t Format>
+bool test_sum(int index, RecreatedMessageQueue& request_queue,
+              RecreatedMessageQueue& reply_queue) {
+  cv::RNG rng(0);
+
+  for (size_t height = 2; height <= 128; height *= 2) {
+    for (size_t width = 2; width <= 128; width *= 2) {
+      cv::Mat input(height, width, Format);
+      rng.fill(input, cv::RNG::UNIFORM, -10, 100);
+      cv::Mat actual = exec_sum<T, Format>(input);
+      cv::Mat expected = get_expected_from_subordinate(index, request_queue,
+                                                       reply_queue, input);
+
+      if (are_float_matrices_different<T>(0.001, actual, expected)) {
+        fail_print_matrices(height, width, input, actual, expected);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+#endif
+
+std::vector<test>& sum_tests_get() {
+  // clang-format off
+  static std::vector<test> tests = {
+    TEST("sum_f32", (test_sum<float, CV_32FC1>), (exec_sum<float, CV_32FC1>)),
+  };
+  // clang-format on
+  return tests;
+}
diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp
index 14ccc52c691d59f115cb22a9d822c26f9d9dbf85..f405106a448dd91a7d86527076a3cdaad0926ba0 100644
--- a/conformity/opencv/tests.cpp
+++ b/conformity/opencv/tests.cpp
@@ -37,6 +37,7 @@ std::vector<test> all_tests = merge_tests({
     float_conversion_tests_get,
     resize_tests_get,
     scale_tests_get,
+    sum_tests_get,
     min_max_tests_get,
     in_range_tests_get,
     remap_tests_get,
diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h
index c1b3c8a4dc3b9e208b421df485a7341d2a28f34a..6b01dcc26472fabd0e0dfa00e5c2e2aa11f81d3e 100644
--- a/conformity/opencv/tests.h
+++ b/conformity/opencv/tests.h
@@ -16,6 +16,7 @@ std::vector<test>& gaussian_blur_tests_get();
 std::vector<test>& rgb2yuv_tests_get();
 std::vector<test>& yuv2rgb_tests_get();
 std::vector<test>& sobel_tests_get();
+std::vector<test>& sum_tests_get();
 std::vector<test>& exp_tests_get();
 std::vector<test>& float_conversion_tests_get();
 std::vector<test>& resize_tests_get();
diff --git a/doc/opencv.md b/doc/opencv.md
index d56d7dedd6c9b60689d5c9e34119b5c5d5477756..52ca6429cc6c296f10d7f843a2acff0695b2314e 100644
--- a/doc/opencv.md
+++ b/doc/opencv.md
@@ -32,6 +32,13 @@ Notes on parameters:
 ### [`cv::bitwise_and()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga60b4d04b251ba5eb1392c34425497e14)
 Bitwise conjunction of two arrays.
 
+### [`cv::sum()`](https://docs.opencv.org/4.10.0/d2/de8/group__core__array.html#ga716e10a2dd9e228e4d3c95818f106722)
+Calculates the sum of array elements.
+
+Notes on parameters:
+* `src.depth()` - only supports `CV_32F` depth.
+* `src.channels()` - only supports 1 channel.
+
 ### [`cv::cvtColor()`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#ga397ae87e1288a81d2363b61574eb8cab)
 Converts the color space of an image.
 
diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt
index d40d2dd9c008d784ede509085990aa5d495a28ef..beb49332cb2aacd1e4a94976efe312192d0f262e 100755
--- a/scripts/benchmark/benchmarks.txt
+++ b/scripts/benchmark/benchmarks.txt
@@ -66,6 +66,8 @@ MinMax_F32: opencv_perf_core '*minMaxVals/*' '($PIXEL_FORMAT, 32FC1)'
 
 MinMaxLoc_U8: opencv_perf_core '*minMaxLoc/*' '($PIXEL_FORMAT, 8UC1)'
 
+Sum_F32: opencv_perf_core '*sum/*' '($PIXEL_FORMAT, 32FC1)'
+
 FloatToInt:  opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 32FC1, 8SC1,  1, 1, 0)'
 FloatToUint: opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 32FC1, 8UC1,  1, 1, 0)'
 IntToFloat:  opencv_perf_core '*convertTo/*' '($PIXEL_FORMAT, 8SC1,  32FC1, 1, 1, 0)'
diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh
index f46892aedc132ec3d6561493d2e14d5aa4e9c871..931c5c0fa006cba53446b8a712a094996b4941e3 100755
--- a/scripts/ci-opencv.sh
+++ b/scripts/ci-opencv.sh
@@ -82,6 +82,7 @@ CORE_TEST_PATTERNS=(
   '*MinMaxLoc*'
   '*Core_ConvertScale*'
   '*Core_Exp*'
+  '*Core_Sum*'
   '*Core_MinMaxIdx*'
   '*Core_minMaxIdx*'
   '*Core_Array*'