From 228e1d285c691aaf20707fae03e0e326dbbad4f7 Mon Sep 17 00:00:00 2001 From: Mark Horvath Date: Tue, 8 Jul 2025 14:33:58 +0000 Subject: [PATCH] Directly call streaming compatible standard routines As it is known at compilation time whether the streaming or non-streaming variants should be used. In some environments it causes issues if the non-streaming routines are called from streaming-compatible functions. (The correct behaviour would be that the compiler automatically links to the streaming variants in such a case.) --- .../include/kleidicv/morphology/workspace.h | 12 +++- kleidicv/include/kleidicv/types.h | 56 +++++++++++++++---- kleidicv/src/filters/gaussian_blur_fixed_sc.h | 11 ++++ 3 files changed, 68 insertions(+), 11 deletions(-) diff --git a/kleidicv/include/kleidicv/morphology/workspace.h b/kleidicv/include/kleidicv/morphology/workspace.h index 58695b34d..55529750a 100644 --- a/kleidicv/include/kleidicv/morphology/workspace.h +++ b/kleidicv/include/kleidicv/morphology/workspace.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -14,6 +14,10 @@ #include "kleidicv/kleidicv.h" #include "kleidicv/types.h" +#if KLEIDICV_TARGET_SME2 +#include +#endif + namespace KLEIDICV_TARGET_NAMESPACE { // Forward declarations. @@ -58,9 +62,15 @@ class MorphologyWorkspace final { constexpr void operator()(Rows src_rows, Rows dst_rows, size_t length) const KLEIDICV_STREAMING_COMPATIBLE { +#if KLEIDICV_TARGET_SME2 + __arm_sc_memcpy(static_cast(&dst_rows[0]), + static_cast(&src_rows[0]), + length * sizeof(T) * dst_rows.channels()); +#else std::memcpy(static_cast(&dst_rows[0]), static_cast(&src_rows[0]), length * sizeof(T) * dst_rows.channels()); +#endif } }; diff --git a/kleidicv/include/kleidicv/types.h b/kleidicv/include/kleidicv/types.h index e1588cc11..98ea4151d 100644 --- a/kleidicv/include/kleidicv/types.h +++ b/kleidicv/include/kleidicv/types.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -13,6 +13,10 @@ #include "kleidicv/ctypes.h" #include "kleidicv/utils.h" +#if KLEIDICV_TARGET_SME2 +#include +#endif + namespace KLEIDICV_TARGET_NAMESPACE { // Represents a point on a 2D plane. @@ -524,8 +528,15 @@ class CopyRows final { public: void process_row(size_t length, Columns src, Columns dst) KLEIDICV_STREAMING_COMPATIBLE { - memmove(static_cast(&dst[0]), static_cast(&src[0]), - length * sizeof(T) * dst.channels()); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memmove(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#else + std::memmove(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#endif } template @@ -542,8 +553,15 @@ class CopyNonOverlappingRows final { public: void process_row(size_t length, Columns src, Columns dst) KLEIDICV_STREAMING_COMPATIBLE { - memcpy(static_cast(&dst[0]), static_cast(&src[0]), - length * sizeof(T) * dst.channels()); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memcpy(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#else + std::memcpy(static_cast(&dst[0]), + static_cast(&src[0]), + length * sizeof(T) * dst.channels()); +#endif } static void copy_rows(Rectangle rect, Rows src, @@ -562,7 +580,11 @@ void make_zero_border_border(Rectangle rect, Rows rows, Margin margin) { if (margin.left()) { size_t margin_width_in_bytes = margin.left() * sizeof(T) * rows.channels(); for (size_t index = 0; index < rect.height(); ++index) { - memset(&rows.at(index)[0], 0, margin_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index)[0], 0, margin_width_in_bytes); +#else + std::memset(&rows.at(index)[0], 0, margin_width_in_bytes); +#endif } } @@ -570,15 +592,24 @@ void make_zero_border_border(Rectangle rect, Rows rows, Margin margin) { size_t top_width = rect.width() - margin.left() - margin.right(); size_t top_width_in_bytes = top_width * sizeof(T) * rows.channels(); for (size_t index = 0; index < margin.top(); ++index) { - memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); +#else + std::memset(&rows.at(index, margin.left())[0], 0, top_width_in_bytes); +#endif } } if (margin.right()) { size_t margin_width_in_bytes = margin.right() * sizeof(T) * rows.channels(); for (size_t index = 0; index < rect.height(); ++index) { - memset(&rows.at(index, rect.width() - margin.right())[0], 0, - margin_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index, rect.width() - margin.right())[0], 0, + margin_width_in_bytes); +#else + std::memset(&rows.at(index, rect.width() - margin.right())[0], 0, + margin_width_in_bytes); +#endif } } @@ -587,7 +618,12 @@ void make_zero_border_border(Rectangle rect, Rows rows, Margin margin) { size_t bottom_width_in_bytes = bottom_width * sizeof(T) * rows.channels(); for (size_t index = rect.height() - margin.bottom(); index < rect.height(); ++index) { - memset(&rows.at(index, margin.left())[0], 0, bottom_width_in_bytes); +#if KLEIDICV_TARGET_SME2 + __arm_sc_memset(&rows.at(index, margin.left())[0], 0, + bottom_width_in_bytes); +#else + std::memset(&rows.at(index, margin.left())[0], 0, bottom_width_in_bytes); +#endif } } } diff --git a/kleidicv/src/filters/gaussian_blur_fixed_sc.h b/kleidicv/src/filters/gaussian_blur_fixed_sc.h index db9c40961..9420b9ad8 100644 --- a/kleidicv/src/filters/gaussian_blur_fixed_sc.h +++ b/kleidicv/src/filters/gaussian_blur_fixed_sc.h @@ -17,6 +17,10 @@ #include "kleidicv/filters/sigma.h" #include "kleidicv/workspace/separable.h" +#if KLEIDICV_TARGET_SME2 +#include +#endif + namespace KLEIDICV_TARGET_NAMESPACE { // Primary template for Gaussian Blur filters. @@ -360,9 +364,16 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( border_type, filter); } else { for (size_t row = y_begin; row < y_end; ++row) { +#if KLEIDICV_TARGET_SME2 + __arm_sc_memcpy( + static_cast(&dst_rows.at(row)[0]), + static_cast(&src_rows.at(row)[0]), + rect.width() * sizeof(ScalarType) * dst_rows.channels()); +#else std::memcpy(static_cast(&dst_rows.at(row)[0]), static_cast(&src_rows.at(row)[0]), rect.width() * sizeof(ScalarType) * dst_rows.channels()); +#endif } } return KLEIDICV_OK; -- GitLab