diff --git a/CMakeLists.txt b/CMakeLists.txt
index a7d6185cf9b038648da0897e041b72b53cc8bda1..92c93528dd94741ed63168887f7c2fa59ec83e9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -420,6 +420,7 @@ if(KLEIDIAI_BUILD_TESTS)
         test/reference/reduce.cpp
         test/reference/reorder.cpp
         test/reference/transpose.cpp
+        test/reference/depthwise_conv.cpp
     )
 
     target_compile_options(kleidiai_test_framework
diff --git a/test/reference/depthwise_conv.cpp b/test/reference/depthwise_conv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7383a3174dc4af338c3792c59063f4217091b217
--- /dev/null
+++ b/test/reference/depthwise_conv.cpp
@@ -0,0 +1,68 @@
+//
+// SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test/reference/depthwise_conv.hpp"
+
+#include "kai/kai_common.h"
+
+namespace kai::test {
+template <typename T>
+Buffer depthwise_reference(
+    const size_t batches, const size_t in_height, const size_t in_width, const size_t channels,
+    const size_t filter_height, const size_t filter_width, const void* feature_map, const void* weights,
+    const void* bias, float clamp_min, float clamp_max) {
+    // Calculate output dims (Padding = Valid).
+    const size_t out_height = in_height + 1 - filter_height;
+    const size_t out_width = in_width + 1 - filter_width;
+    const size_t out_size = out_height * out_width * batches * channels;
+
+    // We accumulate in FP32 and clamp and cast to return type later.
+    std::vector<float32_t> acc(out_size, 0.0f);
+    Buffer dst(out_size * size_in_bits<T> / 8);
+
+    for (size_t b = 0; b < batches; ++b) {
+        for (size_t out_h = 0; out_h < out_height; ++out_h) {
+            for (size_t out_w = 0; out_w < out_width; ++out_w) {
+                // Apply filter to feature map.
+                for (size_t ic = 0; ic < channels; ++ic) {
+                    for (size_t kernel_h = 0; kernel_h < filter_height; ++kernel_h) {
+                        if (in_height <= (out_h + kernel_h)) continue;
+                        for (size_t kernel_w = 0; kernel_w < filter_width; ++kernel_w) {
+                            if (in_width <= (out_w + kernel_w)) continue;
+                            auto in_idx =
+                                ((b * in_height + (out_h + kernel_h)) * in_width + (out_w + kernel_w)) * channels + ic;
+                            auto weights_idx = ((kernel_h * filter_width) + kernel_w) * channels + ic;
+                            auto out_idx = ((b * out_height + out_h) * out_width + out_w) * channels + ic;
+
+                            auto wei_value = read_array<T>(weights, weights_idx);
+                            auto in_value = read_array<T>(feature_map, in_idx);
+
+                            // Perform actual accumulation and store in output vector
+                            acc[out_idx] += in_value * wei_value;
+                        }
+                    }
+                }
+
+                // Apply bias.
+                for (size_t ic = 0; ic < channels; ++ic) {
+                    auto out_idx = ((b * out_height + out_h) * out_width + out_w) * channels;
+                    acc[out_idx + ic] += read_array<T>(bias, ic);
+                }
+            }
+        }
+    }
+
+    // Apply clamping to accumulator, cast to FP16 and store in output vector at the same idx.
+    for (size_t i = 0; i < out_size; i++) {
+        acc[i] = (clamp_min > acc[i]) ? clamp_min : acc[i];
+        acc[i] = (clamp_max < acc[i]) ? clamp_max : acc[i];
+        write_array<T>(dst.data(), i, acc[i]);
+    }
+
+    return dst;
+}
+
+}  // namespace kai::test
diff --git a/test/reference/depthwise_conv.hpp b/test/reference/depthwise_conv.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f3982e86cc6cba94d485ed732c19d90183d0b887
--- /dev/null
+++ b/test/reference/depthwise_conv.hpp
@@ -0,0 +1,39 @@
+//
+// SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <cstddef>
+
+#include "test/common/buffer.hpp"
+#include "test/common/data_type.hpp"
+#include "test/common/memory.hpp"
+
+namespace kai::test {
+
+/// Depthwise Convolution function
+///
+/// @tparam T Data type.
+///
+/// @param[in] batches   Batch dimension of feature map.
+/// @param[in] in_height height of feature map.
+/// @param[in] in_width  width of feature map.
+/// @param[in] channels  Number of channels in feature map.
+/// @param[in] filter_height Height dimension in filter.
+/// @param[in] filter_width  Width of convolution filter.
+/// @param[in] feature_map Ptr to start of feature map.
+/// @param[in] weights Ptr to start of weights buffer/tensor.
+/// @param[in] bias Ptr to start of bias buffer.
+/// @param[in] clamp_min float value to clamp output to (lower bound).
+/// @param[in] clamp_max float value to clamp output to (upper bound).
+///
+/// @return The result data buffer.
+template <typename T>
+Buffer depthwise_reference(
+    const size_t batches, const size_t in_height, const size_t in_width, const size_t channels,
+    const size_t filter_height, const size_t filter_width, const void* feature_map, const void* weights,
+    const void* bias, float clamp_min, float clamp_max);
+
+}  // namespace kai::test