diff --git a/kleidicv/include/kleidicv/morphology/workspace.h b/kleidicv/include/kleidicv/morphology/workspace.h index 58695b34d01ffdc25bac2e1baa00a0309f82139e..55529750a12bcf5e75b86ea80a2665fb0153943f 100644 --- a/kleidicv/include/kleidicv/morphology/workspace.h +++ b/kleidicv/include/kleidicv/morphology/workspace.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -14,6 +14,10 @@ #include "kleidicv/kleidicv.h" #include "kleidicv/types.h" +#if KLEIDICV_TARGET_SME2 +#include +#endif + namespace KLEIDICV_TARGET_NAMESPACE { // Forward declarations. @@ -58,9 +62,15 @@ class MorphologyWorkspace final { constexpr void operator()(Rows src_rows, Rows dst_rows, size_t length) const KLEIDICV_STREAMING_COMPATIBLE { +#if KLEIDICV_TARGET_SME2 + __arm_sc_memcpy(static_cast(&dst_rows[0]), + static_cast(&src_rows[0]), + length * sizeof(T) * dst_rows.channels()); +#else std::memcpy(static_cast(&dst_rows[0]), static_cast(&src_rows[0]), length * sizeof(T) * dst_rows.channels()); +#endif } }; diff --git a/kleidicv/include/kleidicv/types.h b/kleidicv/include/kleidicv/types.h index e1588cc112f0c15a789ce6f97eb1482754b08d54..98ea4151ddca6b542f58be43da5b095be205c439 100644 --- a/kleidicv/include/kleidicv/types.h +++ b/kleidicv/include/kleidicv/types.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -13,6 +13,10 @@ #include "kleidicv/ctypes.h" #include "kleidicv/utils.h" +#if KLEIDICV_TARGET_SME2 +#include +#endif + namespace KLEIDICV_TARGET_NAMESPACE { // Represents a point on a 2D plane. @@ -524,8 +528,15 @@ class CopyRows final { public: void process_row(size_t length, Columns src, Columns dst) KLEIDICV_STREAMING_COMPATIBLE { - memmove(static_cast(&dst[0]), static_cast(&src[0]), - length * sizeof(T) * dst.channels()); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memmove(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#else + std::memmove(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#endif } template @@ -542,8 +553,15 @@ class CopyNonOverlappingRows final { public: void process_row(size_t length, Columns src, Columns dst) KLEIDICV_STREAMING_COMPATIBLE { - memcpy(static_cast(&dst[0]), static_cast(&src[0]), - length * sizeof(T) * dst.channels()); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memcpy(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#else + std::memcpy(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#endif } static void copy_rows(Rectangle rect, Rows src, @@ -562,7 +580,11 @@ void make_zero_border_border(Rectangle rect, Rows rows, Margin margin) { if (margin.left()) { size_t margin_width_in_bytes = margin.left() * sizeof(T) * rows.channels(); for (size_t index = 0; index < rect.height(); ++index) { - memset(&rows.at(index)[0], 0, margin_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index)[0], 0, margin_width_in_bytes); +#else + std::memset(&rows.at(index)[0], 0, margin_width_in_bytes); +#endif } } @@ -570,15 +592,24 @@ void make_zero_border_border(Rectangle rect, Rows rows, Margin margin) { size_t top_width = rect.width() - margin.left() - margin.right(); size_t top_width_in_bytes = top_width * sizeof(T) * rows.channels(); for (size_t index = 0; index < margin.top(); ++index) { - memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); +#else + std::memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); +#endif } } if (margin.right()) { size_t margin_width_in_bytes = margin.right() * sizeof(T) * rows.channels(); for (size_t index = 0; index < rect.height(); ++index) { - memset(&rows.at(index, rect.width() - margin.right())[0], 0, - margin_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index, rect.width() - margin.right())[0], 0, + margin_width_in_bytes); +#else + std::memset(&rows.at(index, rect.width() - margin.right())[0], 0, + margin_width_in_bytes); +#endif } } @@ -587,7 +618,12 @@ void make_zero_border_border(Rectangle rect, Rows rows, Margin margin) { size_t bottom_width_in_bytes = bottom_width * sizeof(T) * rows.channels(); for (size_t index = rect.height() - margin.bottom(); index < rect.height(); ++index) { - memset(&rows.at(index, margin.left())[0], 0, bottom_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index, margin.left())[0], 0, + bottom_width_in_bytes); +#else + std::memset(&rows.at(index, margin.left())[0], 0, bottom_width_in_bytes); +#endif } } } diff --git a/kleidicv/src/filters/gaussian_blur_fixed_sc.h b/kleidicv/src/filters/gaussian_blur_fixed_sc.h index db9c4096183aafe5a03a9a2b555bb9ddcc0f5a0f..9420b9ad861b8a180dd8fe3304e42b3a269a26ec 100644 --- a/kleidicv/src/filters/gaussian_blur_fixed_sc.h +++ b/kleidicv/src/filters/gaussian_blur_fixed_sc.h @@ -17,6 +17,10 @@ #include "kleidicv/filters/sigma.h" #include "kleidicv/workspace/separable.h" +#if KLEIDICV_TARGET_SME2 +#include +#endif + namespace KLEIDICV_TARGET_NAMESPACE { // Primary template for Gaussian Blur filters. @@ -360,9 +364,16 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( border_type, filter); } else { for (size_t row = y_begin; row < y_end; ++row) { +#if KLEIDICV_TARGET_SME2 + __arm_sc_memcpy( + static_cast(&dst_rows.at(row)[0]), + static_cast(&src_rows.at(row)[0]), + rect.width() * sizeof(ScalarType) * dst_rows.channels()); +#else std::memcpy(static_cast(&dst_rows.at(row)[0]), static_cast(&src_rows.at(row)[0]), rect.width() * sizeof(ScalarType) * dst_rows.channels()); +#endif } } return KLEIDICV_OK;