diff --git a/README.md b/README.md index 15914df25c6cd644b6f5a002da872d61e5ed6e2d..f299d54e134f6054edc78c9a8efcb673b94922a6 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,4 @@ It is designed to be simple to integrate into a wide variety of projects. * [Benchmarking](doc/benchmark.md) * [Testing](doc/test.md) * [Contributing](doc/contributing.md) +* [Integration](doc/integration.md) diff --git a/doc/integration.md b/doc/integration.md new file mode 100644 index 0000000000000000000000000000000000000000..ef77269678b50b808af0e1a48c50fbac170f867b --- /dev/null +++ b/doc/integration.md @@ -0,0 +1,70 @@ + + +# Integration guide + +KleidiCV is designed to be easily reusable as it is a single C++ library with a +C API and its only runtime dependency is the C standard library. (Only +compile-time C++ constructs are used in the library.) + +KleidiCV is already integrated into OpenCV, `adapters/opencv/kleidicv_hal.h` and +`adapters/opencv/kleidicv_hal.cpp` contains the implementation of it. It can be + an example for integrating KleidiCV into other projects. OpenCV has a +`hal_replacement.hpp` file per OpenCV module which lists the HAL (Hardware +Acceleration Layer) functions for the given module. +`adapters/opencv/kleidicv_hal.h` sets macros (with some fallback mechanism to +other HAL implementations) to override the default HAL functions, and +`adapters/opencv/kleidicv_hal.cpp` contains implementations for these HAL +functions using KleidiCV as the backend. + +## Extract one operation from KleidiCV + +It can be desirable to extract only one function from KleidiCV, so the +repository contains an example about that at `examples/extract_one_operation`. +It is a CMake project which creates a shared library and an example application +to demonstrate the usage of the created shared library. + +Generally, in KleidiCV one operation is implemented by one C++ source file, so +the example library uses one `.cpp` file, +`kleidicv/src/filters/gaussian_blur_fixed_sme2.cpp` from the KleidiCV source +tree, and all the included header files. +`examples/extract_one_operation/sme_gaussian_blur.cpp` is needed to glue the +example library's public API with the implementation. The example library's +public API is defined by +`examples/extract_one_operation/sme_gaussian_blur_api.h`. + +### Build the example + +Building the example requires an SME capable toolchain and CMake. Example +command to build: + +``` +cmake -S kleidicv/examples/extract_one_operation \ + -B build/extract +``` + +If make was used as the generator for CMake +`build/extract/CMakeFiles/sme_gaussian_blur.dir/path/to/kleidicv/kleidicv/src/filters/gaussian_blur_fixed_sme2.cpp.o.d` +contains which header files were used from KleidiCV. (But it also contains used system headers.) + +To try the example just run: + +``` +./build/extract/example_usage +``` + +### Build the example for Android + +In case of targeting Android the +[Android NDK](https://developer.android.com/ndk/) is also needed. (At least +version r28b.) Example build command is: + +``` +cmake -S kleidicv/examples/extract_one_operation \ + -B build/extract \ + -DANDROID_ABI=arm64-v8a \ + -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk/build/cmake/android.toolchain.cmake +``` diff --git a/examples/extract_one_operation/CMakeLists.txt b/examples/extract_one_operation/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..82d739b84bf9377a8a6dc24e5b1f630255be2015 --- /dev/null +++ b/examples/extract_one_operation/CMakeLists.txt @@ -0,0 +1,47 @@ +# SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +# +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.16) + +project("SME Gaussian Blur") + +# Create example shared library containing only the SME backend of GaussainBlur. +add_library( + sme_gaussian_blur + SHARED + sme_gaussian_blur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../kleidicv/src/filters/gaussian_blur_fixed_sme2.cpp +) + +set_target_properties( + sme_gaussian_blur + PROPERTIES + CXX_STANDARD 17 +) + +target_include_directories( + sme_gaussian_blur + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../kleidicv/include +) + +target_compile_options(sme_gaussian_blur + PRIVATE + "-march=armv9-a+sme" + "-DKLEIDICV_TARGET_SME2=1" + "-DKLEIDICV_TARGET_NAMESPACE=kleidicv::sme2" + "-DKLEIDICV_TARGET_FN_ATTRS=" +) + +# Create example binary to use the library. +add_executable( + example_usage + example_usage.c +) + +target_link_libraries( + example_usage + sme_gaussian_blur +) diff --git a/examples/extract_one_operation/example_usage.c b/examples/extract_one_operation/example_usage.c new file mode 100644 index 0000000000000000000000000000000000000000..3b77fcb038861161828558e72044f83def73a33b --- /dev/null +++ b/examples/extract_one_operation/example_usage.c @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "sme_gaussian_blur_api.h" + +#define WIDTH 20 +#define HEIGHT 20 + +int main(void) { + uint8_t src[WIDTH * HEIGHT]; + uint8_t dst[WIDTH * HEIGHT]; + + // Input image with a vertical line in the middle. + for (size_t y = 0; y < HEIGHT; ++y) { + for (size_t x = 0; x < WIDTH; ++x) { + src[x + y * WIDTH] = (x == (WIDTH / 2)) ? 255 : 0; + } + } + + // Execute 15x15 GaussianBlur with default sigma. + // As src is single channel uint8_t stride is equal to WIDTH. + sme_gaussian_blur_u8(src, WIDTH, dst, WIDTH, WIDTH, HEIGHT, 1, 15, 15, 0.0F, + 0.0F, KLEIDICV_BORDER_TYPE_REFLECT); + + // Print raw pixel values to show that the middle vertical line was blurred. + printf("Raw pixel values for the blurred output:\n"); + for (size_t y = 0; y < HEIGHT; ++y) { + for (size_t x = 0; x < WIDTH; ++x) { + printf("%d\t", dst[x + y * WIDTH]); + } + putchar('\n'); + } + + return 0; +} diff --git a/examples/extract_one_operation/kleidicv/config.h b/examples/extract_one_operation/kleidicv/config.h new file mode 100644 index 0000000000000000000000000000000000000000..780908a0eb0fb409aa3ffa4bc447f2acc803cfe6 --- /dev/null +++ b/examples/extract_one_operation/kleidicv/config.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +// Static config based on kleidicv/include/kleidicv/config.h.in to meet the +// needs of the example. + +#define KLEIDICV_LOCALLY_STREAMING __arm_locally_streaming +#define KLEIDICV_STREAMING_COMPATIBLE __arm_streaming_compatible + +#define KLEIDICV_UNLIKELY(cond) __builtin_expect((cond), 0) + +#define KLEIDICV_ATTR_ALIGNED(alignment) __attribute__((aligned(alignment))) + +#ifdef __clang__ +#define KLEIDICV_FORCE_LOOP_UNROLL _Pragma("clang loop unroll(full)") +#else +// GCC doesn't have clang's unroll(full). 16 is typically plenty. +#define KLEIDICV_FORCE_LOOP_UNROLL _Pragma("GCC unroll 16") +#endif diff --git a/examples/extract_one_operation/sme_gaussian_blur.cpp b/examples/extract_one_operation/sme_gaussian_blur.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eded1b6a0c3ed24ae32da600632847096ef9a3d8 --- /dev/null +++ b/examples/extract_one_operation/sme_gaussian_blur.cpp @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/ctypes.h" +#include "kleidicv/filters/gaussian_blur.h" +#include "kleidicv/utils.h" +#include "kleidicv/workspace/separable.h" + +// Copied from kleidicv/src/filters/separable_filter_2d_api.cpp +static kleidicv_error_t filter_context_create( + kleidicv_filter_context_t **context, size_t max_channels, + size_t max_kernel_width, size_t max_kernel_height, size_t max_image_width, + size_t max_image_height) { + CHECK_POINTERS(context); + + if (max_kernel_width != max_kernel_height) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (max_channels > KLEIDICV_MAXIMUM_CHANNEL_COUNT) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + CHECK_IMAGE_SIZE(max_image_width, max_image_height); + + // As we cannot predict the intermediate size based on the parameters given, + // just use the largest possible size out of all available operations. + constexpr size_t intermediate_size = sizeof(uint32_t); + auto workspace = kleidicv::sme2::SeparableFilterWorkspace::create( + kleidicv::sme2::Rectangle{max_image_width, max_image_height}, + max_channels, intermediate_size); + if (!workspace) { + *context = nullptr; + return KLEIDICV_ERROR_ALLOCATION; + } + + *context = reinterpret_cast(workspace.release()); + return KLEIDICV_OK; +} + +// Copied from kleidicv/src/filters/separable_filter_2d_api.cpp +static kleidicv_error_t filter_context_release( + kleidicv_filter_context_t *context) { + CHECK_POINTERS(context); + + // Deliberately create and immediately destroy a unique_ptr to delete the + // workspace. + // NOLINTBEGIN(bugprone-unused-raii) + kleidicv::sme2::SeparableFilterWorkspace::Pointer{ + reinterpret_cast(context)}; + // NOLINTEND(bugprone-unused-raii) + return KLEIDICV_OK; +} + +extern "C" { + +// Implemented based on kleidicv_gaussian_blur_u8 function (placed in +// kleidicv/src/filters/gaussian_blur_api.cpp), but the filter context is +// created (no need to pass it as a an input) and the SME backend is called +// directly. (Original implementation calls the dispatcher to choose between +// backends.) +kleidicv_error_t sme_gaussian_blur_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, + size_t width, size_t height, + size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, + float sigma_y, + kleidicv_border_type_t border_type) { + auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); + if (!fixed_border_type) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (!kleidicv::gaussian_blur_is_implemented(width, height, kernel_width, + kernel_height, sigma_x, sigma_y, + channels, *fixed_border_type)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + if (kernel_width <= 7 || kernel_width == 15 || kernel_width == 21) { + kleidicv_filter_context_t *context = nullptr; + if (kleidicv_error_t create_err = filter_context_create( + &context, channels, kernel_width, kernel_height, width, height)) { + return create_err; + } + + kleidicv_error_t blur_err = kleidicv::sme2::gaussian_blur_fixed_stripe_u8( + src, src_stride, dst, dst_stride, width, height, 0, height, channels, + kernel_width, kernel_height, sigma_x, sigma_y, *fixed_border_type, + context); + + kleidicv_error_t release_err = filter_context_release(context); + return blur_err ? blur_err : release_err; + } + + return KLEIDICV_ERROR_NOT_IMPLEMENTED; +} + +} // extern "C" diff --git a/examples/extract_one_operation/sme_gaussian_blur_api.h b/examples/extract_one_operation/sme_gaussian_blur_api.h new file mode 100644 index 0000000000000000000000000000000000000000..be58349be248d8f503a22e57b1d712b54046b4fd --- /dev/null +++ b/examples/extract_one_operation/sme_gaussian_blur_api.h @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef SME_GAUSSIAN_BLUR_H +#define SME_GAUSSIAN_BLUR_H + +#ifdef __cplusplus +#include +#include +#else // __cplusplus +#include "inttypes.h" +#include "stddef.h" +#endif // __cplusplus + +// Copied from kleidicv/include/kleidicv/ctypes.h +typedef enum { + /// Success. + KLEIDICV_OK = 0, + /// Requested operation is not implemented. + KLEIDICV_ERROR_NOT_IMPLEMENTED, + /// Null pointer was passed as an argument. + KLEIDICV_ERROR_NULL_POINTER, + /// A value was encountered outside the representable or valid range. + KLEIDICV_ERROR_RANGE, + /// Could not allocate memory. + KLEIDICV_ERROR_ALLOCATION, + /// A value did not meet alignment requirements. + KLEIDICV_ERROR_ALIGNMENT, + /// The provided context (like @ref kleidicv_morphology_context_t) is not + /// compatible with the operation. + KLEIDICV_ERROR_CONTEXT_MISMATCH, +} kleidicv_error_t; + +// Copied from kleidicv/include/kleidicv/ctypes.h +typedef enum { + /// The border is a constant value. + KLEIDICV_BORDER_TYPE_CONSTANT, + /// The border is the value of the first/last element. + KLEIDICV_BORDER_TYPE_REPLICATE, + /// The border is the mirrored value of the first/last elements. + KLEIDICV_BORDER_TYPE_REFLECT, + /// The border simply acts as a "wrap around" to the beginning/end. + KLEIDICV_BORDER_TYPE_WRAP, + /// Like KLEIDICV_BORDER_TYPE_REFLECT, but the first/last elements are + /// ignored. + KLEIDICV_BORDER_TYPE_REVERSE, + /// The border is the "continuation" of the input rows. It is the caller's + /// responsibility to provide the input data (and an appropriate stride value) + /// in a way that the rows can be under and over read. E.g. can be used when + /// executing an operation on a region of a picture. + KLEIDICV_BORDER_TYPE_TRANSPARENT, + /// The border is a hard border, there are no additional values to use. + KLEIDICV_BORDER_TYPE_NONE, +} kleidicv_border_type_t; + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Similar funtionality as of kleidicv_gaussian_blur_u8 but filter context +// creation/destruction is handled automatically and uses the SME backend +// directly, so less kernel sizes are suppored. +kleidicv_error_t sme_gaussian_blur_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, + size_t width, size_t height, + size_t channels, size_t kernel_width, + size_t kernel_height, float sigma_x, + float sigma_y, + kleidicv_border_type_t border_type); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // SME_GAUSSIAN_BLUR_H diff --git a/scripts/ci.sh b/scripts/ci.sh index b2c8928d85905686e65f84d76c36c3e5c0b0671e..357c5160fe201e4dbaa18c165906266281853010 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -111,6 +111,11 @@ cmake -S . -B build/ci/build-benchmark -G Ninja \ -DKLEIDICV_NEON_USE_CONTINUOUS_MULTIVEC_LS=OFF ninja -C build/ci/build-benchmark kleidicv-benchmark +# Build examples to prevent bitrot. +cmake -S ./examples/extract_one_operation -B build/ci/extract_example -G Ninja \ + -DCMAKE_EXE_LINKER_FLAGS="--rtlib=compiler-rt -fuse-ld=lld" +ninja -C build/ci/extract_example + # TODO: Cross-build OpenCV if [[ $(dpkg --print-architecture) = arm64 ]]; then # Check OpenCV-KleidiCV integration