From 3d59c17099a9c958e319fe3b8542dd70a39d2716 Mon Sep 17 00:00:00 2001 From: Ioana Ghiban Date: Thu, 18 Apr 2024 14:17:50 +0200 Subject: [PATCH] Implement SVE and SME versions of min_max --- kleidicv/src/analysis/min_max_api.cpp | 22 ++++++-- kleidicv/src/analysis/min_max_sc.h | 77 ++++++++++++++++++++++++++ kleidicv/src/analysis/min_max_sme2.cpp | 28 ++++++++++ kleidicv/src/analysis/min_max_sve2.cpp | 29 ++++++++++ 4 files changed, 151 insertions(+), 5 deletions(-) create mode 100644 kleidicv/src/analysis/min_max_sc.h create mode 100644 kleidicv/src/analysis/min_max_sme2.cpp create mode 100644 kleidicv/src/analysis/min_max_sve2.cpp diff --git a/kleidicv/src/analysis/min_max_api.cpp b/kleidicv/src/analysis/min_max_api.cpp index 3fcdf995a..ad7c0c908 100644 --- a/kleidicv/src/analysis/min_max_api.cpp +++ b/kleidicv/src/analysis/min_max_api.cpp @@ -21,15 +21,27 @@ kleidicv_error_t min_max_loc(const T *src, size_t src_stride, size_t width, } // namespace neon -namespace sve2 {} // namespace sve2 +namespace sve2 { -namespace sme2 {} // namespace sme2 +template +kleidicv_error_t min_max(const T *src, size_t src_stride, size_t width, + size_t height, T *min_value, T *max_value); +} // namespace sve2 + +namespace sme2 { + +template +kleidicv_error_t min_max(const T *src, size_t src_stride, size_t width, + size_t height, T *min_value, T *max_value); +} // namespace sme2 } // namespace kleidicv -#define KLEIDICV_DEFINE_MINMAX_API(name, type) \ - KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::min_max, nullptr, \ - nullptr) +#define KLEIDICV_DEFINE_MINMAX_API(name, type) \ + KLEIDICV_MULTIVERSION_C_API( \ + name, &kleidicv::neon::min_max, \ + KLEIDICV_SVE2_IMPL_IF(&kleidicv::sve2::min_max), \ + &kleidicv::sme2::min_max) KLEIDICV_DEFINE_MINMAX_API(kleidicv_min_max_u8, uint8_t); KLEIDICV_DEFINE_MINMAX_API(kleidicv_min_max_s8, int8_t); diff --git a/kleidicv/src/analysis/min_max_sc.h b/kleidicv/src/analysis/min_max_sc.h new file mode 100644 index 000000000..a01ce6089 --- /dev/null +++ b/kleidicv/src/analysis/min_max_sc.h @@ -0,0 +1,77 @@ +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_MIN_MAX_SC_H +#define KLEIDICV_MIN_MAX_SC_H + +#include + +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +template +class MinMax final : public UnrollTwice { + public: + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + using ContextType = Context; + + MinMax(VectorType &vmin, VectorType &vmax) : vmin_{vmin}, vmax_{vmax} {} + + void vector_path(ContextType ctx, VectorType src) { + auto pg = ctx.predicate(); + vmin_ = svmin_x(pg, vmin_, src); + vmax_ = svmax_x(pg, vmax_, src); + } + + ScalarType get_min() const { + auto pg = VecTraits::svptrue(); + return svminv(pg, vmin_); + } + + ScalarType get_max() const { + auto pg = VecTraits::svptrue(); + return svmaxv(pg, vmax_); + } + + private: + VectorType &vmin_, &vmax_; +}; // end of class MinMax + +template +kleidicv_error_t min_max_sc(const ScalarType *src, size_t src_stride, + size_t width, size_t height, ScalarType *min_value, + ScalarType *max_value) { + CHECK_POINTER_AND_STRIDE(src, src_stride); + CHECK_IMAGE_SIZE(width, height); + + if (KLEIDICV_UNLIKELY(width == 0 || height == 0)) { + return KLEIDICV_ERROR_RANGE; + } + + Rectangle rect{width, height}; + Rows src_rows{src, src_stride}; + + using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + VectorType vmin = VecTraits::svdup(std::numeric_limits::max()); + VectorType vmax = VecTraits::svdup(std::numeric_limits::lowest()); + + MinMax operation{vmin, vmax}; + + apply_operation_by_rows(operation, rect, src_rows); + if (min_value) { + *min_value = operation.get_min(); + } + if (max_value) { + *max_value = operation.get_max(); + } + return KLEIDICV_OK; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_MIN_MAX_SC_H diff --git a/kleidicv/src/analysis/min_max_sme2.cpp b/kleidicv/src/analysis/min_max_sme2.cpp new file mode 100644 index 000000000..7c119a12c --- /dev/null +++ b/kleidicv/src/analysis/min_max_sme2.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "min_max_sc.h" + +namespace kleidicv::sme2 { + +template +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +min_max(const T *src, size_t src_stride, size_t width, size_t height, + T *min_value, T *max_value) { + return min_max_sc(src, src_stride, width, height, min_value, max_value); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t min_max( \ + const type *src, size_t src_stride, size_t width, size_t height, \ + type *min_value, type *max_value) + +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int16_t); +KLEIDICV_INSTANTIATE_TEMPLATE(uint16_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int32_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::sme2 diff --git a/kleidicv/src/analysis/min_max_sve2.cpp b/kleidicv/src/analysis/min_max_sve2.cpp new file mode 100644 index 000000000..200b4ebe4 --- /dev/null +++ b/kleidicv/src/analysis/min_max_sve2.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "min_max_sc.h" + +namespace kleidicv::sve2 { + +template +KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t min_max(const T *src, + size_t src_stride, + size_t width, size_t height, + T *min_value, T *max_value) { + return min_max_sc(src, src_stride, width, height, min_value, max_value); +} + +#define KLEIDICV_INSTANTIATE_TEMPLATE(type) \ + template KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t min_max( \ + const type *src, size_t src_stride, size_t width, size_t height, \ + type *min_value, type *max_value) + +KLEIDICV_INSTANTIATE_TEMPLATE(int8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(uint8_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int16_t); +KLEIDICV_INSTANTIATE_TEMPLATE(uint16_t); +KLEIDICV_INSTANTIATE_TEMPLATE(int32_t); +KLEIDICV_INSTANTIATE_TEMPLATE(float); + +} // namespace kleidicv::sve2 -- GitLab