diff --git a/CHANGELOG.md b/CHANGELOG.md index 20d100bbfcde384e07650d6db0a3f28c0609e51b..cb86f428d22704687b8c7d2cf6d287ff744f9685 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ This changelog aims to follow the guiding principles of - Remap implementation - 2-channel s16 and s16+u16 fixed-point coordinates and 1-channel u8 input. - 2-channel s16 + 5+5 bits' fractions fixed-point coordinates and 1-channel u8 input. +- Implementation for cv::pyrDown in the OpenCV HAL. ## 0.2.0 - 2024-09-30 diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 6c1c3782935159b63a3df55be7e67286dab13c63..01b33c8b00c5b6918bb1d340d6539c73fd72edaf 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -12,6 +12,7 @@ #include #include +#include "kleidicv/filters/blur_and_downsample.h" #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/kleidicv.h" #include "kleidicv_thread/kleidicv_thread.h" @@ -1276,4 +1277,54 @@ int remap_s16point5(int src_type, const uchar *src_data, size_t src_step, return CV_HAL_ERROR_NOT_IMPLEMENTED; } +int pyrdown(const uchar *src_data, size_t src_step, int src_width, + int src_height, uchar *dst_data, size_t dst_step, int dst_width, + int dst_height, int depth, int cn, int border_type) { + if (src_data == dst_data) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + switch (depth) { + case CV_8U: + break; + + default: + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if ((dst_width != (src_width + 1) / 2) || + (dst_height != (src_height + 1) / 2)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_border_type_t kleidicv_border_type; + if (from_opencv(border_type, kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + // Check for not-implemented before allocating a context + if (!kleidicv::blur_and_downsample_is_implemented(src_width, src_height, + cn) || + !kleidicv::get_fixed_border_type(kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_filter_context_t *context; + if (kleidicv_error_t create_err = kleidicv_filter_context_create( + &context, cn, 5, 5, static_cast(src_width), + static_cast(src_height))) { + return convert_error(create_err); + } + + auto mt = get_multithreading(); + kleidicv_error_t blur_err = kleidicv_thread_blur_and_downsample_u8( + reinterpret_cast(src_data), src_step, src_width, + src_height, reinterpret_cast(dst_data), dst_step, cn, + kleidicv_border_type, context, mt); + + kleidicv_error_t release_err = kleidicv_filter_context_release(context); + + return convert_error(blur_err ? blur_err : release_err); +} + } // namespace kleidicv::hal diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index c45eaede7d816c442a26af287e9ab2f0a79fb2e3..70f92d20f98de4eda01ee8ef5dcab251a429d9b3 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -116,6 +116,10 @@ int canny(const uchar *src_data, size_t src_step, uchar *dst_data, double highThreshold, int ksize, bool L2gradient); #endif // KLEIDICV_EXPERIMENTAL_FEATURE_CANNY +int pyrdown(const uchar *src_data, size_t src_step, int src_width, + int src_height, uchar *dst_data, size_t dst_step, int dst_width, + int dst_height, int depth, int cn, int border_type); + int transpose(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int src_width, int src_height, int element_size); @@ -376,7 +380,6 @@ static inline int kleidicv_remap_s16_with_fallback( src_height, dst_data, dst_step, dst_width, dst_height, mapxy, mapxy_step, border_type, border_value); } - #undef cv_hal_remap16s #define cv_hal_remap16s kleidicv_remap_s16_with_fallback #endif // cv_hal_remap16s @@ -400,6 +403,18 @@ static inline int kleidicv_remap_s16point5_with_fallback( #define cv_hal_remap16s16u kleidicv_remap_s16point5_with_fallback #endif // cv_hal_remap16s16u +// pyrdown +static inline int kleidicv_pyrdown_with_fallback( + const uchar *src_data, size_t src_step, int src_width, int src_height, + uchar *dst_data, size_t dst_step, int dst_width, int dst_height, int depth, + int cn, int border_type) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + pyrdown, cv_hal_pyrdown, src_data, src_step, src_width, src_height, + dst_data, dst_step, dst_width, dst_height, depth, cn, border_type); +} +#undef cv_hal_pyrdown +#define cv_hal_pyrdown kleidicv_pyrdown_with_fallback + #endif // OPENCV_IMGPROC_HAL_REPLACEMENT_HPP #ifdef OPENCV_CORE_HAL_REPLACEMENT_HPP diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 35fa0a855e87e41d5dc45d52a08be93606b381b6..f3b034c802d461aa5e87a40cd73823e00e5fbf98 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -451,3 +451,25 @@ static void in_range(Function f, T lower_bound, T upper_bound, BENCH_IN_RANGE(in_range_u8, in_range_u8, 1, 2, uint8_t); BENCH_IN_RANGE(in_range_f32, in_range_f32, 1.111, 1.112, float); + +static void blur_and_downsample_u8(benchmark::State& state) { + kleidicv_filter_context_t* context; + kleidicv_error_t err = kleidicv_filter_context_create( + &context, 1, 5, 5, image_width, image_height); + if (err != KLEIDICV_OK) { + state.SkipWithError( + "Could not initialize filter context for Blur and Downsample"); + return; + } + + bench_functor(state, [context]() { + (void)kleidicv_blur_and_downsample_u8( + get_source_buffer_a(), image_width * sizeof(uint8_t), + image_width, image_height, get_destination_buffer(), + ((image_width + 1) / 2) * sizeof(uint8_t), 1, + KLEIDICV_BORDER_TYPE_REFLECT, context); + }); + + (void)kleidicv_filter_context_release(context); +} +BENCHMARK(blur_and_downsample_u8); diff --git a/conformity/opencv/test_blur_and_downsample.cpp b/conformity/opencv/test_blur_and_downsample.cpp new file mode 100644 index 0000000000000000000000000000000000000000..252aa5c158c7b984c500e7097a0037df7f2225c3 --- /dev/null +++ b/conformity/opencv/test_blur_and_downsample.cpp @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "tests.h" + +template +cv::Mat exec_blur_and_downsample(cv::Mat& input) { + cv::Mat result; + cv::pyrDown(input, result, cv::Size(), BorderType); + return result; +} + +#if MANAGER +template +bool test_blur_and_downsample(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, CV_8UC1); + rng.fill(input, cv::RNG::UNIFORM, 0, 255); + + cv::Mat actual = exec_blur_and_downsample(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +std::vector& blur_and_downsample_tests_get() { + // clang-format off + static std::vector tests = { + TEST("Blur and Downsample, BORDER_REFLECT_101", (test_blur_and_downsample), exec_blur_and_downsample), + TEST("Blur and Downsample, BORDER_REFLECT", (test_blur_and_downsample), exec_blur_and_downsample), + TEST("Blur and Downsample, BORDER_WRAP", (test_blur_and_downsample), exec_blur_and_downsample), + TEST("Blur and Downsample, BORDER_REPLICATE", (test_blur_and_downsample), exec_blur_and_downsample), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 59a957658bd783baca766a942e7d468ecbafa622..b3d15769c53bedd794794302dfe60f1b6a8f5268 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -40,6 +40,7 @@ std::vector all_tests = merge_tests({ min_max_tests_get, in_range_tests_get, remap_tests_get, + blur_and_downsample_tests_get, // clang-format on }); diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index 8b0efd612cebef09c9f6f322eef3abfcdb28b174..c1b3c8a4dc3b9e208b421df485a7341d2a28f34a 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -23,5 +23,6 @@ std::vector& scale_tests_get(); std::vector& min_max_tests_get(); std::vector& in_range_tests_get(); std::vector& remap_tests_get(); +std::vector& blur_and_downsample_tests_get(); #endif // KLEIDICV_OPENCV_CONFORMITY_TESTS_H_ diff --git a/doc/opencv.md b/doc/opencv.md index 8fe94b51b2594ff46460e32aa4211419c0c3cc07..d56d7dedd6c9b60689d5c9e34119b5c5d5477756 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -193,6 +193,7 @@ Notes on parameters: ### [`cv::remap()`](https://docs.opencv.org/4.10.0/da/d54/group__imgproc__transform.html#gab75ef31ce5cdfb5c44b6da5f3b908ea4) Geometrically transforms the `src` image by taking the pixels specified by the coordinates from the `map` image. + Notes on parameters: * `src.depth()` - only supports `CV_8U` depth and 1 channel. * `borderMode` - only supports `BORDER_REPLICATE` @@ -201,3 +202,10 @@ Supported map configurations: * supported `interpolation`: `INTER_NEAREST` only * `map1` is 16SC2 and `map2` is 16UC1: `map1` is as above, `map2` contains combined 5+5 bits of x (low) and y (high) fractions, i.e. x = x1 + x2 / 2^5 * supported `interpolation`: `INTER_LINEAR` only + +### [`cv::pyrDown()`](https://docs.opencv.org/4.10.0/d4/d86/group__imgproc__filter.html#gaf9bba239dfca11654cb7f50f889fc2ff) +Blurs and downsamples an image. + +Notes on parameters: +* `src.depth()` - only supports `CV_8U` and 1 channel. +* if `dstsize` is specified it must be equal to `Size((src.cols + 1) / 2, (src.rows + 1) / 2)` diff --git a/kleidicv/include/kleidicv/filters/blur_and_downsample.h b/kleidicv/include/kleidicv/filters/blur_and_downsample.h new file mode 100644 index 0000000000000000000000000000000000000000..474e2c402784a150311ba9fc7dbf644f150b3318 --- /dev/null +++ b/kleidicv/include/kleidicv/filters/blur_and_downsample.h @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_FILTERS_BLUR_AND_DOWNSAMPLE_H +#define KLEIDICV_FILTERS_BLUR_AND_DOWNSAMPLE_H + +#include "kleidicv/config.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/types.h" +#include "kleidicv/workspace/border_types.h" + +extern "C" { +// For internal use only. See instead kleidicv_blur_and_downsample_u8. +// Blurs and downsamples a horizontal stripe across an image. The stripe is +// defined by the range (y_begin, y_end]. +KLEIDICV_API_DECLARATION(kleidicv_blur_and_downsample_stripe_u8, + const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, uint8_t *dst, + size_t dst_stride, size_t y_begin, size_t y_end, + size_t channels, + kleidicv::FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context); +} + +namespace kleidicv { + +inline bool blur_and_downsample_is_implemented(size_t src_width, + size_t src_height, + size_t channels) { + return (src_width >= 4 && src_height >= 4) && (channels == 1); +} + +namespace neon { + +kleidicv_error_t kleidicv_blur_and_downsample_stripe_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t y_begin, size_t y_end, + size_t channels, FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context); + +} // namespace neon + +namespace sve2 { + +kleidicv_error_t kleidicv_blur_and_downsample_stripe_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t y_begin, size_t y_end, + size_t channels, FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context); + +} // namespace sve2 + +namespace sme2 { + +kleidicv_error_t kleidicv_blur_and_downsample_stripe_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t y_begin, size_t y_end, + size_t channels, FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context); + +} // namespace sme2 + +} // namespace kleidicv + +#endif // KLEIDICV_FILTERS_BLUR_AND_DOWNSAMPLE_H diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 20ecb8db2ae71148471e5f0539c95d92b5e20e3a..a31577a0502e2bb4abce74b8768b77a676867501 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -1342,6 +1342,60 @@ kleidicv_error_t kleidicv_gaussian_blur_u8( size_t kernel_height, float sigma_x, float sigma_y, kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +#ifndef DOXYGEN +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Applies 5x5 binomial Gaussian blur to the source image and downsaples the +/// result by keeping odd rows and columns only. +/// This function can be used to generate an Image Pyramid. +/// In-place operation is not supported. +/// +/// The number of elements in the source is limited to @ref +/// KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// Width and height of the destination is calculated as: +/// - `dst_width = (src_width + 1) / 2` +/// - `dst_height = (src_height + 1) / 2` +/// +/// Usage: +/// +/// Before using this function, a context must be created using @ref +/// kleidicv_filter_context_create, and when finished, it has to be released +/// using @ref kleidicv_filter_context_release. Please ensure that your filter +/// context parameters are large enough (max_kernel_width and max_kernel_height +/// must be at least 5), otherwise this API will return with an error. +/// +/// Note, from the border types only these are supported: +/// - @ref KLEIDICV_BORDER_TYPE_REPLICATE +/// - @ref KLEIDICV_BORDER_TYPE_REFLECT +/// - @ref KLEIDICV_BORDER_TYPE_WRAP +/// - @ref KLEIDICV_BORDER_TYPE_REVERSE +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row in the source data. Must be a +/// multiple of `sizeof(type)` and no less than `src_width +/// * sizeof(type) * channels`, except for single-row +/// images. +/// @param src_width Number of columns in the source data. (One column +/// consists of `channels` number of elements.) +/// @param src_height Number of rows in the source data. +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row in the destination data. Must be +/// a multiple of `sizeof(type)` and no less than +/// `dst_width * sizeof(type) * channels`, except for +/// single-row images. +/// @param channels Number of channels in the data. Must be equal to 1. +/// @param border_type Way of handling the border. +/// @param context Pointer to filter context. +/// +kleidicv_error_t kleidicv_blur_and_downsample_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t channels, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context); +#endif + /// Splits a multi channel source stream into separate 1-channel streams. Width /// and height are the same for the source stream and for all the destination /// streams. Number of pixels is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. diff --git a/kleidicv/include/kleidicv/neon.h b/kleidicv/include/kleidicv/neon.h index c5a59dc1a57d0c68d26553eaf5181c821dc1c213..35e42a88cc64044caafef08011f1698f6869aaf6 100644 --- a/kleidicv/include/kleidicv/neon.h +++ b/kleidicv/include/kleidicv/neon.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -10,7 +10,6 @@ #include "kleidicv/neon_intrinsics.h" #include "kleidicv/operations.h" #include "kleidicv/utils.h" -#include "kleidicv/workspace/separable.h" namespace kleidicv::neon { diff --git a/kleidicv/include/kleidicv/sve2.h b/kleidicv/include/kleidicv/sve2.h index 214cda636595e03d431422519ff125aebf7c30ec..43e2e40d561a08b0bc172d0d4c5ab3c0788d427b 100644 --- a/kleidicv/include/kleidicv/sve2.h +++ b/kleidicv/include/kleidicv/sve2.h @@ -11,7 +11,6 @@ #include "kleidicv/operations.h" #include "kleidicv/utils.h" -#include "kleidicv/workspace/separable.h" // It is used by SVE2 and SME2, the actual namespace will reflect it. namespace KLEIDICV_TARGET_NAMESPACE { diff --git a/kleidicv/include/kleidicv/workspace/blur_and_downsample_ws.h b/kleidicv/include/kleidicv/workspace/blur_and_downsample_ws.h new file mode 100644 index 0000000000000000000000000000000000000000..cbdb06713a6dc71430c16815118074ea4aac9d12 --- /dev/null +++ b/kleidicv/include/kleidicv/workspace/blur_and_downsample_ws.h @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_WORKSPACE_BLUR_AND_DOWNSAMPLE_WS_H +#define KLEIDICV_WORKSPACE_BLUR_AND_DOWNSAMPLE_WS_H + +#include "separable.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +// Alter SeparableFilterWorkspace's behavior to only process elements in even +// rows and columns +class BlurAndDownsampleFilterWorkspace final : public SeparableFilterWorkspace { + public: + template + void process(Rectangle rect, size_t y_begin, size_t y_end, + Rows src_rows, + Rows dst_rows, + size_t channels, typename FilterType::BorderType border_type, + FilterType filter) KLEIDICV_STREAMING_COMPATIBLE { + // Border helper which calculates border offsets. + typename FilterType::BorderInfoType vertical_border{rect.height(), + border_type}; + typename FilterType::BorderInfoType horizontal_border{rect.width(), + border_type}; + + // Buffer rows which hold intermediate widened data. + auto buffer_rows = Rows{reinterpret_cast( + &data_[buffer_rows_offset_]), + buffer_rows_stride_, channels}; + + // Vertical processing loop. + for (size_t vertical_index = y_begin; vertical_index < y_end; + vertical_index += 2) { + // Recalculate vertical border offsets. + auto offsets = vertical_border.offsets_with_border(vertical_index); + // Process in the vertical direction first. + filter.process_vertical(rect.width(), src_rows.at(vertical_index), + buffer_rows, offsets); + // Process in the horizontal direction last. + process_horizontal(rect.width(), buffer_rows, + dst_rows.at(vertical_index / 2), filter, + horizontal_border); + } + } + + private: + template + void process_horizontal(size_t width, + Rows buffer_rows, + Rows dst_rows, + FilterType filter, + typename FilterType::BorderInfoType horizontal_border) + KLEIDICV_STREAMING_COMPATIBLE { + // Margin associated with the filter. + constexpr size_t margin = filter.margin; + + // Process data affected by left border. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t horizontal_index = 0; horizontal_index < margin; + horizontal_index += 2) { + auto offsets = + horizontal_border.offsets_with_left_border(horizontal_index); + filter.process_horizontal_borders(buffer_rows.at(0, horizontal_index), + dst_rows.at(0, horizontal_index / 2), + offsets); + } + + // Process data which is not affected by any borders in bulk. + { + size_t width_without_borders = width - (2 * margin); + auto offsets = horizontal_border.offsets_without_border(); + size_t start = align_up(margin, 2); + filter.process_horizontal(width_without_borders, buffer_rows.at(0, start), + dst_rows.at(0, start / 2), offsets); + } + + // Process data affected by right border. + for (size_t index = align_up(width - margin, 2); index < width; + index += 2) { + auto offsets = horizontal_border.offsets_with_right_border(index); + filter.process_horizontal_borders(buffer_rows.at(0, index), + dst_rows.at(0, index / 2), offsets); + } + } +}; // end of class BlurAndDownsampleFilterWorkspace + +// BlurAndDownsampleFilterWorkspace and SeparableFilterWorkspace must have the +// same size because through the API of this library only +// SeparableFilterWorkspace can be created. So, child classes of +// SeparableFilterWorkspace can only add functionality but cannot add member +// variables. +static_assert(sizeof(BlurAndDownsampleFilterWorkspace) == + sizeof(SeparableFilterWorkspace)); + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_WORKSPACE_BLUR_AND_DOWNSAMPLE_WS_H diff --git a/kleidicv/include/kleidicv/workspace/border_15x15.h b/kleidicv/include/kleidicv/workspace/border_15x15.h index eb3ae12ad665dc081a514ec87bc486d2313e42d7..c281383abae58570d2f2cd7ebde645f26c520767 100644 --- a/kleidicv/include/kleidicv/workspace/border_15x15.h +++ b/kleidicv/include/kleidicv/workspace/border_15x15.h @@ -25,30 +25,31 @@ class FixedBorderInfo final { Offsets() = default; // NOLINTEND(hicpp-member-init) - Offsets(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6, size_t o7, size_t o8, size_t o9, size_t o10, size_t o11, - size_t o12, size_t o13, size_t o14) + Offsets(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2, ptrdiff_t o3, + ptrdiff_t o4, ptrdiff_t o5, ptrdiff_t o6, ptrdiff_t o7, + ptrdiff_t o8, ptrdiff_t o9, ptrdiff_t o10, ptrdiff_t o11, + ptrdiff_t o12, ptrdiff_t o13, ptrdiff_t o14) : offsets_{o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11, o12, o13, o14} {} - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } - size_t c3() const { return offsets_[3]; } - size_t c4() const { return offsets_[4]; } - size_t c5() const { return offsets_[5]; } - size_t c6() const { return offsets_[6]; } - size_t c7() const { return offsets_[7]; } - size_t c8() const { return offsets_[8]; } - size_t c9() const { return offsets_[9]; } - size_t c10() const { return offsets_[10]; } - size_t c11() const { return offsets_[11]; } - size_t c12() const { return offsets_[12]; } - size_t c13() const { return offsets_[13]; } - size_t c14() const { return offsets_[14]; } + ptrdiff_t c0() const { return offsets_[0]; } + ptrdiff_t c1() const { return offsets_[1]; } + ptrdiff_t c2() const { return offsets_[2]; } + ptrdiff_t c3() const { return offsets_[3]; } + ptrdiff_t c4() const { return offsets_[4]; } + ptrdiff_t c5() const { return offsets_[5]; } + ptrdiff_t c6() const { return offsets_[6]; } + ptrdiff_t c7() const { return offsets_[7]; } + ptrdiff_t c8() const { return offsets_[8]; } + ptrdiff_t c9() const { return offsets_[9]; } + ptrdiff_t c10() const { return offsets_[10]; } + ptrdiff_t c11() const { return offsets_[11]; } + ptrdiff_t c12() const { return offsets_[12]; } + ptrdiff_t c13() const { return offsets_[13]; } + ptrdiff_t c14() const { return offsets_[14]; } private: - size_t offsets_[15]; + ptrdiff_t offsets_[15]; }; FixedBorderInfo(size_t height, FixedBorderType border_type) @@ -255,10 +256,11 @@ class FixedBorderInfo final { private: // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6, size_t o7, size_t o8, size_t o9, size_t o10, - size_t o11, size_t o12, size_t o13, - size_t o14) const KLEIDICV_STREAMING_COMPATIBLE { + Offsets get(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2, ptrdiff_t o3, + ptrdiff_t o4, ptrdiff_t o5, ptrdiff_t o6, ptrdiff_t o7, + ptrdiff_t o8, ptrdiff_t o9, ptrdiff_t o10, ptrdiff_t o11, + ptrdiff_t o12, ptrdiff_t o13, + ptrdiff_t o14) const KLEIDICV_STREAMING_COMPATIBLE { return Offsets{o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11, o12, o13, o14}; } diff --git a/kleidicv/include/kleidicv/workspace/border_3x3.h b/kleidicv/include/kleidicv/workspace/border_3x3.h index ecd5627d328811f358fb05a469a830e966a35e76..a3fc69182cc8cab56ffa65123578f119a402c11c 100644 --- a/kleidicv/include/kleidicv/workspace/border_3x3.h +++ b/kleidicv/include/kleidicv/workspace/border_3x3.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -23,14 +23,14 @@ class FixedBorderInfo final { public: Offsets() = default; - Offsets(size_t o0, size_t o1, size_t o2) : offsets_{o0, o1, o2} {} + Offsets(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2) : offsets_{o0, o1, o2} {} - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } + ptrdiff_t c0() const { return offsets_[0]; } + ptrdiff_t c1() const { return offsets_[1]; } + ptrdiff_t c2() const { return offsets_[2]; } private: - size_t offsets_[3]; + ptrdiff_t offsets_[3]; }; FixedBorderInfo(size_t height, FixedBorderType border_type) @@ -99,7 +99,7 @@ class FixedBorderInfo final { private: // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2) const { + Offsets get(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2) const { return Offsets{o0, o1, o2}; } diff --git a/kleidicv/include/kleidicv/workspace/border_5x5.h b/kleidicv/include/kleidicv/workspace/border_5x5.h index 06c2683bd9e5d1f895ca61cc4c887b1bd324b8ce..8d19636ec0de2ff25954e3531c4e6bb9b5dca51c 100644 --- a/kleidicv/include/kleidicv/workspace/border_5x5.h +++ b/kleidicv/include/kleidicv/workspace/border_5x5.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2023 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -25,17 +25,18 @@ class FixedBorderInfo final { Offsets() = default; // NOLINTEND(hicpp-member-init) - Offsets(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4) + Offsets(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2, ptrdiff_t o3, + ptrdiff_t o4) : offsets_{o0, o1, o2, o3, o4} {} - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } - size_t c3() const { return offsets_[3]; } - size_t c4() const { return offsets_[4]; } + ptrdiff_t c0() const { return offsets_[0]; } + ptrdiff_t c1() const { return offsets_[1]; } + ptrdiff_t c2() const { return offsets_[2]; } + ptrdiff_t c3() const { return offsets_[3]; } + ptrdiff_t c4() const { return offsets_[4]; } private: - size_t offsets_[5]; + ptrdiff_t offsets_[5]; }; FixedBorderInfo(size_t height, FixedBorderType border_type) @@ -144,8 +145,8 @@ class FixedBorderInfo final { private: // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2, size_t o3, - size_t o4) const KLEIDICV_STREAMING_COMPATIBLE { + Offsets get(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2, ptrdiff_t o3, + ptrdiff_t o4) const KLEIDICV_STREAMING_COMPATIBLE { return Offsets{o0, o1, o2, o3, o4}; } diff --git a/kleidicv/include/kleidicv/workspace/border_7x7.h b/kleidicv/include/kleidicv/workspace/border_7x7.h index 75bb86117e76e0b490c39eeebd050417ba43506f..83e6d391157144a55007955f782f0195b3c73c76 100644 --- a/kleidicv/include/kleidicv/workspace/border_7x7.h +++ b/kleidicv/include/kleidicv/workspace/border_7x7.h @@ -25,20 +25,20 @@ class FixedBorderInfo final { Offsets() = default; // NOLINTEND(hicpp-member-init) - Offsets(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6) + Offsets(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2, ptrdiff_t o3, + ptrdiff_t o4, ptrdiff_t o5, ptrdiff_t o6) : offsets_{o0, o1, o2, o3, o4, o5, o6} {} - size_t c0() const { return offsets_[0]; } - size_t c1() const { return offsets_[1]; } - size_t c2() const { return offsets_[2]; } - size_t c3() const { return offsets_[3]; } - size_t c4() const { return offsets_[4]; } - size_t c5() const { return offsets_[5]; } - size_t c6() const { return offsets_[6]; } + ptrdiff_t c0() const { return offsets_[0]; } + ptrdiff_t c1() const { return offsets_[1]; } + ptrdiff_t c2() const { return offsets_[2]; } + ptrdiff_t c3() const { return offsets_[3]; } + ptrdiff_t c4() const { return offsets_[4]; } + ptrdiff_t c5() const { return offsets_[5]; } + ptrdiff_t c6() const { return offsets_[6]; } private: - size_t offsets_[7]; + ptrdiff_t offsets_[7]; }; FixedBorderInfo(size_t height, FixedBorderType border_type) @@ -163,8 +163,9 @@ class FixedBorderInfo final { private: // Takes care of static signed to unsigned casts. - Offsets get(size_t o0, size_t o1, size_t o2, size_t o3, size_t o4, size_t o5, - size_t o6) const KLEIDICV_STREAMING_COMPATIBLE { + Offsets get(ptrdiff_t o0, ptrdiff_t o1, ptrdiff_t o2, ptrdiff_t o3, + ptrdiff_t o4, ptrdiff_t o5, + ptrdiff_t o6) const KLEIDICV_STREAMING_COMPATIBLE { return Offsets{o0, o1, o2, o3, o4, o5, o6}; } diff --git a/kleidicv/include/kleidicv/workspace/separable.h b/kleidicv/include/kleidicv/workspace/separable.h index dd10268df92eec63fe36357cedf70742698b91a4..8e303473101b18e5034f09f3fb9344d611da3155 100644 --- a/kleidicv/include/kleidicv/workspace/separable.h +++ b/kleidicv/include/kleidicv/workspace/separable.h @@ -38,7 +38,8 @@ class SeparableFilterWorkspaceDeleter { // Limitations // // 1. In-place operations are not supported. -// 2. The input has to be at least filter-sized. +// 2. The input's width and height have to be at least `filter's width - 1` and +// `filter's height - 1`, respectively. // // Example // @@ -67,7 +68,7 @@ class SeparableFilterWorkspaceDeleter { // // Handling of borders is calculated based on offsets rather than setting up // suitably-sized buffers which could hold both borders and data. -class SeparableFilterWorkspace final { +class SeparableFilterWorkspace { public: // To avoid load/store penalties. static constexpr size_t kAlignment = 16UL; @@ -154,7 +155,7 @@ class SeparableFilterWorkspace final { } } - private: + protected: template void process_horizontal(size_t width, Rows buffer_rows, diff --git a/kleidicv/src/filters/blur_and_downsample_api.cpp b/kleidicv/src/filters/blur_and_downsample_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f03e76c66f0d1a373ece4c7dc794fccf02c333aa --- /dev/null +++ b/kleidicv/src/filters/blur_and_downsample_api.cpp @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/dispatch.h" +#include "kleidicv/filters/blur_and_downsample.h" +#include "kleidicv/kleidicv.h" + +KLEIDICV_MULTIVERSION_C_API( + kleidicv_blur_and_downsample_stripe_u8, + &kleidicv::neon::kleidicv_blur_and_downsample_stripe_u8, + &kleidicv::sve2::kleidicv_blur_and_downsample_stripe_u8, + &kleidicv::sme2::kleidicv_blur_and_downsample_stripe_u8); + +extern "C" { + +kleidicv_error_t kleidicv_blur_and_downsample_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t channels, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context) { + if (!kleidicv::blur_and_downsample_is_implemented(src_width, src_height, + channels)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); + if (!fixed_border_type) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + return kleidicv_blur_and_downsample_stripe_u8( + src, src_stride, src_width, src_height, dst, dst_stride, 0, src_height, + channels, *fixed_border_type, context); +} + +} // extern "C" diff --git a/kleidicv/src/filters/blur_and_downsample_neon.cpp b/kleidicv/src/filters/blur_and_downsample_neon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..876405610bc598b0f5792d073ec0718e383467e3 --- /dev/null +++ b/kleidicv/src/filters/blur_and_downsample_neon.cpp @@ -0,0 +1,259 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/ctypes.h" +#include "kleidicv/filters/blur_and_downsample.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" +#include "kleidicv/utils.h" +#include "kleidicv/workspace/blur_and_downsample_ws.h" +#include "kleidicv/workspace/border_5x5.h" + +namespace kleidicv::neon { + +// Applies Gaussian Blur binomial filter to even rows and columns +// +// [ 1, 4, 6, 4, 1 ] [ 1 ] +// [ 4, 16, 24, 16, 4 ] [ 4 ] +// F = 1/256 * [ 6, 24, 36, 24, 6 ] = 1/256 * [ 6 ] * [ 1, 4, 6, 4, 1 ] +// [ 4, 16, 24, 16, 4 ] [ 4 ] +// [ 1, 4, 6, 4, 1 ] [ 1 ] +class BlurAndDownsample { + public: + using SourceType = uint8_t; + using BufferType = uint16_t; + using DestinationType = uint8_t; + using SourceVecTraits = typename neon::VecTraits; + using SourceVectorType = typename SourceVecTraits::VectorType; + using BufferVecTraits = typename neon::VecTraits; + using BufferVectorType = typename BufferVecTraits::VectorType; + using BorderInfoType = + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5; + using BorderType = FixedBorderType; + using BorderOffsets = typename BorderInfoType::Offsets; + + BlurAndDownsample() + : const_6_u8_half_{vdup_n_u8(6)}, + const_6_u16_{vdupq_n_u16(6)}, + const_4_u16_{vdupq_n_u16(4)} {} + + static constexpr size_t margin = 2UL; + + void process_vertical(size_t width, Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets) const { + LoopUnroll2 loop{width * src_rows.channels(), + SourceVecTraits::num_lanes()}; + + loop.unroll_twice([&](ptrdiff_t index) { + const auto *src_0 = &src_rows.at(border_offsets.c0())[index]; + const auto *src_1 = &src_rows.at(border_offsets.c1())[index]; + const auto *src_2 = &src_rows.at(border_offsets.c2())[index]; + const auto *src_3 = &src_rows.at(border_offsets.c3())[index]; + const auto *src_4 = &src_rows.at(border_offsets.c4())[index]; + + SourceVectorType src_a[5], src_b[5]; + src_a[0] = vld1q(&src_0[0]); + src_b[0] = vld1q(&src_0[SourceVecTraits::num_lanes()]); + src_a[1] = vld1q(&src_1[0]); + src_b[1] = vld1q(&src_1[SourceVecTraits::num_lanes()]); + src_a[2] = vld1q(&src_2[0]); + src_b[2] = vld1q(&src_2[SourceVecTraits::num_lanes()]); + src_a[3] = vld1q(&src_3[0]); + src_b[3] = vld1q(&src_3[SourceVecTraits::num_lanes()]); + src_a[4] = vld1q(&src_4[0]); + src_b[4] = vld1q(&src_4[SourceVecTraits::num_lanes()]); + vertical_vector_path(src_a, &dst_rows[index]); + vertical_vector_path( + src_b, &dst_rows[index + static_cast( + SourceVecTraits::num_lanes())]); + }); + + loop.unroll_once([&](ptrdiff_t index) { + SourceVectorType src[5]; + src[0] = vld1q(&src_rows.at(border_offsets.c0())[index]); + src[1] = vld1q(&src_rows.at(border_offsets.c1())[index]); + src[2] = vld1q(&src_rows.at(border_offsets.c2())[index]); + src[3] = vld1q(&src_rows.at(border_offsets.c3())[index]); + src[4] = vld1q(&src_rows.at(border_offsets.c4())[index]); + vertical_vector_path(src, &dst_rows[index]); + }); + + loop.tail([&](ptrdiff_t index) { + SourceType src[5]; + src[0] = src_rows.at(border_offsets.c0())[index]; + src[1] = src_rows.at(border_offsets.c1())[index]; + src[2] = src_rows.at(border_offsets.c2())[index]; + src[3] = src_rows.at(border_offsets.c3())[index]; + src[4] = src_rows.at(border_offsets.c4())[index]; + vertical_scalar_path(src, &dst_rows[index]); + }); + } + + void process_horizontal(size_t width, Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets) const { + LoopUnroll2 loop{width * src_rows.channels(), + BufferVecTraits::num_lanes()}; + + loop.unroll_twice([&](ptrdiff_t index) { + const auto *src_0 = &src_rows.at(0, border_offsets.c0())[index]; + const auto *src_1 = &src_rows.at(0, border_offsets.c1())[index]; + const auto *src_2 = &src_rows.at(0, border_offsets.c2())[index]; + const auto *src_3 = &src_rows.at(0, border_offsets.c3())[index]; + const auto *src_4 = &src_rows.at(0, border_offsets.c4())[index]; + + BufferVectorType src_a[5], src_b[5]; + src_a[0] = vld1q(&src_0[0]); + src_b[0] = vld1q(&src_0[BufferVecTraits::num_lanes()]); + src_a[1] = vld1q(&src_1[0]); + src_b[1] = vld1q(&src_1[BufferVecTraits::num_lanes()]); + src_a[2] = vld1q(&src_2[0]); + src_b[2] = vld1q(&src_2[BufferVecTraits::num_lanes()]); + src_a[3] = vld1q(&src_3[0]); + src_b[3] = vld1q(&src_3[BufferVecTraits::num_lanes()]); + src_a[4] = vld1q(&src_4[0]); + src_b[4] = vld1q(&src_4[BufferVecTraits::num_lanes()]); + + uint8x8_t res_a = horizontal_vector_path(src_a); + uint8x8_t res_b = horizontal_vector_path(src_b); + + // Only store even indices + vst1(&dst_rows[index / 2], vuzp1_u8(res_a, res_b)); + }); + + loop.remaining([&](ptrdiff_t index, size_t max_index) { + index = align_up(index, 2); + while (index < static_cast(max_index)) { + process_horizontal_scalar(src_rows, dst_rows, border_offsets, index); + index += 2; + } + }); + } + + void process_horizontal_borders(Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets) const { + for (ptrdiff_t index = 0; + index < static_cast(src_rows.channels()); ++index) { + disable_loop_vectorization(); + process_horizontal_scalar(src_rows, dst_rows, border_offsets, index); + } + } + + private: + // Applies vertical filtering vector using SIMD operations. + // + // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T + void vertical_vector_path(uint8x16_t src[5], BufferType *dst) const { + uint16x8_t acc_0_4_l = vaddl_u8(vget_low_u8(src[0]), vget_low_u8(src[4])); + uint16x8_t acc_0_4_h = vaddl_u8(vget_high_u8(src[0]), vget_high_u8(src[4])); + uint16x8_t acc_1_3_l = vaddl_u8(vget_low_u8(src[1]), vget_low_u8(src[3])); + uint16x8_t acc_1_3_h = vaddl_u8(vget_high_u8(src[1]), vget_high_u8(src[3])); + uint16x8_t acc_l = + vmlal_u8(acc_0_4_l, vget_low_u8(src[2]), const_6_u8_half_); + uint16x8_t acc_h = + vmlal_u8(acc_0_4_h, vget_high_u8(src[2]), const_6_u8_half_); + acc_l = vmlaq_u16(acc_l, acc_1_3_l, const_4_u16_); + acc_h = vmlaq_u16(acc_h, acc_1_3_h, const_4_u16_); + vst1q(&dst[0], acc_l); + vst1q(&dst[8], acc_h); + } + + // Applies vertical filtering vector using scalar operations. + // + // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T + void vertical_scalar_path(const SourceType src[5], BufferType *dst) const { + dst[0] = src[0] + src[4] + 4 * (src[1] + src[3]) + 6 * src[2]; + } + + // Applies horizontal filtering vector using SIMD operations. + // + // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T + uint8x8_t horizontal_vector_path(uint16x8_t src[5]) const { + uint16x8_t acc_0_4 = vaddq_u16(src[0], src[4]); + uint16x8_t acc_1_3 = vaddq_u16(src[1], src[3]); + uint16x8_t acc_u16 = vmlaq_u16(acc_0_4, src[2], const_6_u16_); + acc_u16 = vmlaq_u16(acc_u16, acc_1_3, const_4_u16_); + return vrshrn_n_u16(acc_u16, 8); + } + + // Applies horizontal filtering vector using scalar operations. + // + // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T + void process_horizontal_scalar(Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets, + ptrdiff_t index) const { + BufferType src[5]; + src[0] = src_rows.at(0, border_offsets.c0())[index]; + src[1] = src_rows.at(0, border_offsets.c1())[index]; + src[2] = src_rows.at(0, border_offsets.c2())[index]; + src[3] = src_rows.at(0, border_offsets.c3())[index]; + src[4] = src_rows.at(0, border_offsets.c4())[index]; + + auto acc = src[0] + src[4] + 4 * (src[1] + src[3]) + 6 * src[2]; + dst_rows[index / 2] = rounding_shift_right(acc, 8); + } + + uint8x8_t const_6_u8_half_; + uint16x8_t const_6_u16_; + uint16x8_t const_4_u16_; +}; // end of class BlurAndDownsample + +// Does not include checks for whether the operation is implemented. +// This must be done earlier, by blur_and_downsample_is_implemented. +static kleidicv_error_t blur_and_downsample_checks( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t channels, + BlurAndDownsampleFilterWorkspace *workspace) { + CHECK_POINTERS(workspace); + CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (src_height + 1) / 2); + CHECK_IMAGE_SIZE(src_width, src_height); + + Rectangle rect{src_width, src_height}; + const Rectangle &context_rect = workspace->image_size(); + if (context_rect.width() < src_width || context_rect.height() < src_height) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + // Currently supports only one channel, so it cannot be tested. + // GCOVR_EXCL_START + if (workspace->channels() < channels) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + // GCOVR_EXCL_STOP + + return KLEIDICV_OK; +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t kleidicv_blur_and_downsample_stripe_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t y_begin, size_t y_end, + size_t channels, FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context) { + // Does not include checks for whether the operation is implemented. + // This must be done earlier, by blur_and_downsample_is_implemented. + auto *workspace = + reinterpret_cast(context); + + if (auto check_result = + blur_and_downsample_checks(src, src_stride, src_width, src_height, + dst, dst_stride, channels, workspace)) { + return check_result; + } + + Rectangle rect{src_width, src_height}; + + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, + fixed_border_type, BlurAndDownsample{}); + + return KLEIDICV_OK; +} + +} // namespace kleidicv::neon diff --git a/kleidicv/src/filters/blur_and_downsample_sc.h b/kleidicv/src/filters/blur_and_downsample_sc.h new file mode 100644 index 0000000000000000000000000000000000000000..92a18ceb6a5564b4e9fee8fd1409d0d6cfc1eae9 --- /dev/null +++ b/kleidicv/src/filters/blur_and_downsample_sc.h @@ -0,0 +1,301 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/ctypes.h" +#include "kleidicv/filters/blur_and_downsample.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" +#include "kleidicv/utils.h" +#include "kleidicv/workspace/blur_and_downsample_ws.h" +#include "kleidicv/workspace/border_5x5.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +// Applies Gaussian Blur binomial filter to even rows and columns +// +// [ 1, 4, 6, 4, 1 ] [ 1 ] +// [ 4, 16, 24, 16, 4 ] [ 4 ] +// F = 1/256 * [ 6, 24, 36, 24, 6 ] = 1/256 * [ 6 ] * [ 1, 4, 6, 4, 1 ] +// [ 4, 16, 24, 16, 4 ] [ 4 ] +// [ 1, 4, 6, 4, 1 ] [ 1 ] +class BlurAndDownsample { + public: + using SourceType = uint8_t; + using BufferType = uint16_t; + using DestinationType = uint8_t; + using SourceVecTraits = + typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; + using SourceVectorType = typename SourceVecTraits::VectorType; + using SourceVector2Type = typename SourceVecTraits::Vector2Type; + using BufferVecTraits = + typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; + using BufferVectorType = typename BufferVecTraits::VectorType; + using BorderInfoType = + typename ::KLEIDICV_TARGET_NAMESPACE::FixedBorderInfo5x5; + using BorderType = FixedBorderType; + using BorderOffsets = typename BorderInfoType::Offsets; + + static constexpr size_t margin = 2UL; + + void process_vertical( + size_t width, Rows src_rows, Rows dst_rows, + BorderOffsets border_offsets) const KLEIDICV_STREAMING_COMPATIBLE { + LoopUnroll2 loop{width * src_rows.channels(), SourceVecTraits::num_lanes()}; + + loop.unroll_twice([&](ptrdiff_t index) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg_all = SourceVecTraits::svptrue(); + vertical_vector_path_2x(pg_all, src_rows, dst_rows, border_offsets, + index); + }); + + loop.unroll_once([&](ptrdiff_t index) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg_all = SourceVecTraits::svptrue(); + vertical_vector_path_1x(pg_all, src_rows, dst_rows, border_offsets, + index); + }); + + loop.remaining([&](ptrdiff_t index, + ptrdiff_t length) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg = SourceVecTraits::svwhilelt(index, length); + vertical_vector_path_1x(pg, src_rows, dst_rows, border_offsets, index); + }); + } + + void process_horizontal(size_t width, Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets) const + KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg_all = BufferVecTraits::svptrue(); + LoopUnroll2 loop{width * src_rows.channels(), BufferVecTraits::num_lanes()}; + + loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING_COMPATIBLE { + horizontal_vector_path_2x(pg_all, pg_all, src_rows, pg_all, dst_rows, + border_offsets, index); + }); + + loop.remaining( + [&](size_t index, size_t length) KLEIDICV_STREAMING_COMPATIBLE { + svbool_t pg_src_0 = BufferVecTraits::svwhilelt(index, length); + svbool_t pg_src_1 = BufferVecTraits::svwhilelt( + index + BufferVecTraits::num_lanes(), length); + svbool_t pg_dst = + BufferVecTraits::svwhilelt((index + 1) / 2, (length + 1) / 2); + horizontal_vector_path_2x(pg_src_0, pg_src_1, src_rows, pg_dst, + dst_rows, border_offsets, index); + }); + } + + void process_horizontal_borders( + Rows src_rows, Rows dst_rows, + BorderOffsets border_offsets) const KLEIDICV_STREAMING_COMPATIBLE { + for (ptrdiff_t index = 0; + index < static_cast(src_rows.channels()); ++index) { + disable_loop_vectorization(); + svbool_t pg = svptrue_pat_b8(SV_VL1); + horizontal_border_path(pg, src_rows, dst_rows, border_offsets, index); + } + } + + private: + void vertical_vector_path_2x(svbool_t pg, Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets, size_t index) const + KLEIDICV_STREAMING_COMPATIBLE { + auto src_row_0 = &src_rows.at(border_offsets.c0())[index]; + auto src_row_1 = &src_rows.at(border_offsets.c1())[index]; + auto src_row_2 = &src_rows.at(border_offsets.c2())[index]; + auto src_row_3 = &src_rows.at(border_offsets.c3())[index]; + auto src_row_4 = &src_rows.at(border_offsets.c4())[index]; + + SourceVector2Type src_0; + SourceVector2Type src_1; + SourceVector2Type src_2; + SourceVector2Type src_3; + SourceVector2Type src_4; + + src_0 = + svcreate2(svld1(pg, &src_row_0[0]), svld1_vnum(pg, &src_row_0[0], 1)); + src_1 = + svcreate2(svld1(pg, &src_row_1[0]), svld1_vnum(pg, &src_row_1[0], 1)); + src_2 = + svcreate2(svld1(pg, &src_row_2[0]), svld1_vnum(pg, &src_row_2[0], 1)); + src_3 = + svcreate2(svld1(pg, &src_row_3[0]), svld1_vnum(pg, &src_row_3[0], 1)); + src_4 = + svcreate2(svld1(pg, &src_row_4[0]), svld1_vnum(pg, &src_row_4[0], 1)); + + vertical_vector_path(pg, svget2(src_0, 0), svget2(src_1, 0), + svget2(src_2, 0), svget2(src_3, 0), svget2(src_4, 0), + &dst_rows[index]); + vertical_vector_path(pg, svget2(src_0, 1), svget2(src_1, 1), + svget2(src_2, 1), svget2(src_3, 1), svget2(src_4, 1), + &dst_rows[index + SourceVecTraits::num_lanes()]); + } + + void vertical_vector_path_1x(svbool_t pg, Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets, size_t index) const + KLEIDICV_STREAMING_COMPATIBLE { + SourceVectorType src_0 = + svld1(pg, &src_rows.at(border_offsets.c0())[index]); + SourceVectorType src_1 = + svld1(pg, &src_rows.at(border_offsets.c1())[index]); + SourceVectorType src_2 = + svld1(pg, &src_rows.at(border_offsets.c2())[index]); + SourceVectorType src_3 = + svld1(pg, &src_rows.at(border_offsets.c3())[index]); + SourceVectorType src_4 = + svld1(pg, &src_rows.at(border_offsets.c4())[index]); + vertical_vector_path(pg, src_0, src_1, src_2, src_3, src_4, + &dst_rows[index]); + } + + // Applies vertical filtering vector using SIMD operations. + // + // DST = [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T + void vertical_vector_path(svbool_t pg, svuint8_t src_0, svuint8_t src_1, + svuint8_t src_2, svuint8_t src_3, svuint8_t src_4, + BufferType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + svuint16_t acc_0_4_b = svaddlb_u16(src_0, src_4); + svuint16_t acc_0_4_t = svaddlt_u16(src_0, src_4); + svuint16_t acc_1_3_b = svaddlb_u16(src_1, src_3); + svuint16_t acc_1_3_t = svaddlt_u16(src_1, src_3); + + svuint16_t acc_u16_b = svmlalb_n_u16(acc_0_4_b, src_2, 6); + svuint16_t acc_u16_t = svmlalt_n_u16(acc_0_4_t, src_2, 6); + acc_u16_b = svmla_n_u16_x(pg, acc_u16_b, acc_1_3_b, 4); + acc_u16_t = svmla_n_u16_x(pg, acc_u16_t, acc_1_3_t, 4); + + svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t); + svst2(pg, &dst[0], interleaved); + } + + void horizontal_vector_path_2x( + svbool_t pg_src_0, svbool_t pg_src_1, Rows src_rows, + svbool_t pg_dst, Rows dst_rows, + BorderOffsets border_offsets, + size_t index) const KLEIDICV_STREAMING_COMPATIBLE { + auto src_0 = &src_rows.at(0, border_offsets.c0())[index]; + auto src_1 = &src_rows.at(0, border_offsets.c1())[index]; + auto src_2 = &src_rows.at(0, border_offsets.c2())[index]; + auto src_3 = &src_rows.at(0, border_offsets.c3())[index]; + auto src_4 = &src_rows.at(0, border_offsets.c4())[index]; + + BufferVectorType src_0_0 = svld1(pg_src_0, &src_0[0]); + BufferVectorType src_1_0 = svld1_vnum(pg_src_1, &src_0[0], 1); + BufferVectorType src_0_1 = svld1(pg_src_0, &src_1[0]); + BufferVectorType src_1_1 = svld1_vnum(pg_src_1, &src_1[0], 1); + BufferVectorType src_0_2 = svld1(pg_src_0, &src_2[0]); + BufferVectorType src_1_2 = svld1_vnum(pg_src_1, &src_2[0], 1); + BufferVectorType src_0_3 = svld1(pg_src_0, &src_3[0]); + BufferVectorType src_1_3 = svld1_vnum(pg_src_1, &src_3[0], 1); + BufferVectorType src_0_4 = svld1(pg_src_0, &src_4[0]); + BufferVectorType src_1_4 = svld1_vnum(pg_src_1, &src_4[0], 1); + + svuint16_t res_0 = horizontal_vector_path(pg_src_0, src_0_0, src_0_1, + src_0_2, src_0_3, src_0_4); + svuint16_t res_1 = horizontal_vector_path(pg_src_1, src_1_0, src_1_1, + src_1_2, src_1_3, src_1_4); + + svuint16_t res_even_only = svuzp1(res_0, res_1); + svst1b(pg_dst, &dst_rows[index / 2], res_even_only); + } + + // Applies horizontal filtering vector using SIMD operations. + // + // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T + svuint16_t horizontal_vector_path( + svbool_t pg, svuint16_t src_0, svuint16_t src_1, svuint16_t src_2, + svuint16_t src_3, svuint16_t src_4) const KLEIDICV_STREAMING_COMPATIBLE { + svuint16_t acc_0_4 = svadd_x(pg, src_0, src_4); + svuint16_t acc_1_3 = svadd_x(pg, src_1, src_3); + svuint16_t acc = svmla_n_u16_x(pg, acc_0_4, src_2, 6); + acc = svmla_n_u16_x(pg, acc, acc_1_3, 4); + acc = svrshr_x(pg, acc, 8); + return acc; + } + + // Applies horizontal filtering for the borders using SIMD operations. + // + // DST = 1/256 * [ SRC0, SRC1, SRC2, SRC3, SRC4 ] * [ 1, 4, 6, 4, 1 ]T + void horizontal_border_path(svbool_t pg, Rows src_rows, + Rows dst_rows, + BorderOffsets border_offsets, size_t index) const + KLEIDICV_STREAMING_COMPATIBLE { + BufferVectorType src_0 = + svld1(pg, &src_rows.at(0, border_offsets.c0())[index]); + BufferVectorType src_1 = + svld1(pg, &src_rows.at(0, border_offsets.c1())[index]); + BufferVectorType src_2 = + svld1(pg, &src_rows.at(0, border_offsets.c2())[index]); + BufferVectorType src_3 = + svld1(pg, &src_rows.at(0, border_offsets.c3())[index]); + BufferVectorType src_4 = + svld1(pg, &src_rows.at(0, border_offsets.c4())[index]); + + svuint16_t acc_0_4 = svadd_x(pg, src_0, src_4); + svuint16_t acc_1_3 = svadd_x(pg, src_1, src_3); + svuint16_t acc = svmla_n_u16_x(pg, acc_0_4, src_2, 6); + acc = svmla_n_u16_x(pg, acc, acc_1_3, 4); + acc = svrshr_x(pg, acc, 8); + + svst1b(pg, &dst_rows[index / 2], acc); + } +}; // end of class BlurAndDownsample + +// Does not include checks for whether the operation is implemented. +// This must be done earlier, by blur_and_downsample_is_implemented. +static kleidicv_error_t blur_and_downsample_checks( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t channels, + BlurAndDownsampleFilterWorkspace *workspace) KLEIDICV_STREAMING_COMPATIBLE { + CHECK_POINTERS(workspace); + CHECK_POINTER_AND_STRIDE(src, src_stride, src_height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (src_height + 1) / 2); + CHECK_IMAGE_SIZE(src_width, src_height); + + Rectangle rect{src_width, src_height}; + const Rectangle &context_rect = workspace->image_size(); + if (context_rect.width() < src_width || context_rect.height() < src_height) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + + // Currently supports only one channel, so it cannot be tested. + // GCOVR_EXCL_START + if (workspace->channels() < channels) { + return KLEIDICV_ERROR_CONTEXT_MISMATCH; + } + // GCOVR_EXCL_STOP + + return KLEIDICV_OK; +} + +static kleidicv_error_t blur_and_downsample_stripe_u8_sc( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t y_begin, size_t y_end, + size_t channels, FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context) KLEIDICV_STREAMING_COMPATIBLE { + // Does not include checks for whether the operation is implemented. + // This must be done earlier, by blur_and_downsample_is_implemented. + auto *workspace = + reinterpret_cast(context); + + if (auto check_result = + blur_and_downsample_checks(src, src_stride, src_width, src_height, + dst, dst_stride, channels, workspace)) { + return check_result; + } + + Rectangle rect{src_width, src_height}; + + Rows src_rows{src, src_stride, channels}; + Rows dst_rows{dst, dst_stride, channels}; + workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, + fixed_border_type, BlurAndDownsample{}); + + return KLEIDICV_OK; +} + +} // namespace KLEIDICV_TARGET_NAMESPACE diff --git a/kleidicv/src/filters/blur_and_downsample_sme2.cpp b/kleidicv/src/filters/blur_and_downsample_sme2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a62eef80876ad1d4cb51b70554e12b9332905d13 --- /dev/null +++ b/kleidicv/src/filters/blur_and_downsample_sme2.cpp @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "blur_and_downsample_sc.h" +#include "kleidicv/filters/blur_and_downsample.h" + +namespace kleidicv::sme2 { + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +kleidicv_blur_and_downsample_stripe_u8(const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, + size_t y_begin, size_t y_end, + size_t channels, + FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context) { + return blur_and_downsample_stripe_u8_sc( + src, src_stride, src_width, src_height, dst, dst_stride, y_begin, y_end, + channels, fixed_border_type, context); +} + +} // namespace kleidicv::sme2 diff --git a/kleidicv/src/filters/blur_and_downsample_sve2.cpp b/kleidicv/src/filters/blur_and_downsample_sve2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..69a3a39d192c34e05b8dbc320dfde7f848f1e760 --- /dev/null +++ b/kleidicv/src/filters/blur_and_downsample_sve2.cpp @@ -0,0 +1,21 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "blur_and_downsample_sc.h" +#include "kleidicv/filters/blur_and_downsample.h" + +namespace kleidicv::sve2 { + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t kleidicv_blur_and_downsample_stripe_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t y_begin, size_t y_end, + size_t channels, FixedBorderType fixed_border_type, + kleidicv_filter_context_t *context) { + return blur_and_downsample_stripe_u8_sc( + src, src_stride, src_width, src_height, dst, dst_stride, y_begin, y_end, + channels, fixed_border_type, context); +} + +} // namespace kleidicv::sve2 diff --git a/kleidicv/src/filters/gaussian_blur_neon.cpp b/kleidicv/src/filters/gaussian_blur_neon.cpp index e34000932869f500d447ff80c0aba37dada4bd91..b2a8cf22c1c85981510238a1ce06aed642398600 100644 --- a/kleidicv/src/filters/gaussian_blur_neon.cpp +++ b/kleidicv/src/filters/gaussian_blur_neon.cpp @@ -14,6 +14,7 @@ #include "kleidicv/separable_filter_5x5_neon.h" #include "kleidicv/separable_filter_7x7_neon.h" #include "kleidicv/sigma.h" +#include "kleidicv/workspace/separable.h" namespace kleidicv::neon { diff --git a/kleidicv/src/filters/gaussian_blur_sc.h b/kleidicv/src/filters/gaussian_blur_sc.h index 14141efd13be2f490c5771bbb7947e79a9f1859d..2eeb5931a7ca8cb2212747fea41d4181394a2216 100644 --- a/kleidicv/src/filters/gaussian_blur_sc.h +++ b/kleidicv/src/filters/gaussian_blur_sc.h @@ -15,6 +15,7 @@ #include "kleidicv/separable_filter_7x7_sc.h" #include "kleidicv/sigma.h" #include "kleidicv/sve2.h" +#include "kleidicv/workspace/separable.h" namespace KLEIDICV_TARGET_NAMESPACE { diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index be9c6f147138930254aa41872efb8286a65ce21a..87199d1bc73dfb8c735d297ebe7b256d1533332f 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -9,6 +9,7 @@ #include "kleidicv/kleidicv.h" #include "kleidicv/neon.h" #include "kleidicv/separable_filter_5x5_neon.h" +#include "kleidicv/workspace/separable.h" namespace kleidicv::neon { diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index 868f67dd689d7ca4a9a1ab2d07436201fcbb8dae..b73689760211db4dc55367f6baa8c6e26172b37a 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -10,6 +10,7 @@ #include "kleidicv/kleidicv.h" #include "kleidicv/separable_filter_5x5_sc.h" #include "kleidicv/sve2.h" +#include "kleidicv/workspace/separable.h" namespace KLEIDICV_TARGET_NAMESPACE { diff --git a/kleidicv/src/filters/sobel_neon.cpp b/kleidicv/src/filters/sobel_neon.cpp index b76b4e24b6a4449dc83b3bca21b7c7e4b17426a7..f7a33cebafbf7c78b893a4b954b35eccbe722a44 100644 --- a/kleidicv/src/filters/sobel_neon.cpp +++ b/kleidicv/src/filters/sobel_neon.cpp @@ -4,9 +4,9 @@ #include "kleidicv/filters/sobel.h" #include "kleidicv/kleidicv.h" -#include "kleidicv/morphology/workspace.h" #include "kleidicv/neon.h" #include "kleidicv/separable_filter_3x3_neon.h" +#include "kleidicv/workspace/separable.h" namespace kleidicv::neon { diff --git a/kleidicv/src/filters/sobel_sc.h b/kleidicv/src/filters/sobel_sc.h index 769f8a7fed74113b5baf6b765cdd8aec1fa002f3..6b75e7d9c9f7f666ebb130fd5432e098b9d88ee5 100644 --- a/kleidicv/src/filters/sobel_sc.h +++ b/kleidicv/src/filters/sobel_sc.h @@ -9,6 +9,7 @@ #include "kleidicv/kleidicv.h" #include "kleidicv/separable_filter_3x3_sc.h" #include "kleidicv/sve2.h" +#include "kleidicv/workspace/separable.h" namespace KLEIDICV_TARGET_NAMESPACE { diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index 08a0ac6b19cf45471fd8eaf628171589faa194ee..f96c488c53da17cd50ff5f2167f103abd7f2ed97 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -311,6 +311,16 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_s16( kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, kleidicv_thread_multithreading); +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_blur_and_downsample_u8 - see +/// the documentation of that function for more details. +kleidicv_error_t kleidicv_thread_blur_and_downsample_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t channels, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt); + /// Internal - not part of the public API and its direct use is not supported. /// /// Multithreaded implementation of kleidicv_sobel_3x3_horizontal_s16_u8 - see diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index f043063a87c3ebda5daf15f40432f6be4eb06310..1c9263cf3bba7de96517101e34c2cf4591e6aedb 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -9,6 +9,7 @@ #include #include +#include "kleidicv/filters/blur_and_downsample.h" #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/filters/separable_filter_2d.h" #include "kleidicv/filters/sobel.h" @@ -500,6 +501,31 @@ kleidicv_error_t kleidicv_thread_separable_filter_2d_s16( kernel_height, context, mt); } +kleidicv_error_t kleidicv_thread_blur_and_downsample_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t channels, + kleidicv_border_type_t border_type, kleidicv_filter_context_t *context, + kleidicv_thread_multithreading mt) { + if (!kleidicv::blur_and_downsample_is_implemented(src_width, src_height, + channels)) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + auto fixed_border_type = kleidicv::get_fixed_border_type(border_type); + if (!fixed_border_type) { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + + auto callback = [=](unsigned y_begin, unsigned y_end, + kleidicv_filter_context_t *thread_context) { + return kleidicv_blur_and_downsample_stripe_u8( + src, src_stride, src_width, src_height, dst, dst_stride, y_begin, y_end, + channels, *fixed_border_type, thread_context); + }; + return kleidicv_thread_filter(callback, src_width, src_height, channels, 5, 5, + context, mt); +} + kleidicv_error_t kleidicv_thread_sobel_3x3_horizontal_s16_u8( const uint8_t *src, size_t src_stride, int16_t *dst, size_t dst_stride, size_t width, size_t height, size_t channels, diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 6de898fac3d0a4e9d08d3f94ff6ba65be9db47b1..3621944f3a9d530fee65113673459d48f52fadba 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -95,6 +95,8 @@ benchmarks=( "Remap_S16_U8: opencv_perf_imgproc '*Remap/*' '(3840x2160, 8UC1, 16SC2, INTER_NEAREST, BORDER_REPLICATE)'" "Remap_S16Point5_U8: opencv_perf_imgproc '*Remap/*' '(3840x2160, 8UC1, 16SC2, INTER_LINEAR, BORDER_REPLICATE)'" + + "BlurAndDownsample: opencv_perf_imgproc '*pyrDown/*' '(3840x2160, 8UC1)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index 573c951e16d4eb0944684022c300ddbd7092443d..1045bf6721db8f4bbd34e01d986be3c983138a33 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -95,6 +95,8 @@ benchmarks=( "Remap_S16_U8: opencv_perf_imgproc '*Remap/*' '(1920x1080, 8UC1, 16SC2, INTER_NEAREST, BORDER_REPLICATE)'" "Remap_S16Point5_U8: opencv_perf_imgproc '*Remap/*' '(1920x1080, 8UC1, 16SC2, INTER_LINEAR, BORDER_REPLICATE)'" + + "BlurAndDownsample: opencv_perf_imgproc '*pyrDown/*' '(1920x1080, 8UC1)'" ) for idx in "${!benchmarks[@]}"; do diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index 082fcb922cc2abfa1ba356f7aa851b041c562e99..ca25c177ac2456ef7463a089f274f038359aa743 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -49,6 +49,7 @@ IMGPROC_TEST_PATTERNS=( '*Imgproc_Resize*' '*Imgproc_Dilate*' '*Imgproc_Erode*' + '*Imgproc_PyramidDown*' ) IMGPROC_TEST_PATTERNS_STR="$(join_strings_with_colon "${IMGPROC_TEST_PATTERNS[*]}")" ../../../conformity/opencv_kleidicv/bin/opencv_test_imgproc \ diff --git a/test/api/test_blur_and_downsample.cpp b/test/api/test_blur_and_downsample.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1d36a65ffa48a37e85fb64433496515e29f962a5 --- /dev/null +++ b/test/api/test_blur_and_downsample.cpp @@ -0,0 +1,345 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/kernel.h" +#include "framework/utils.h" +#include "kleidicv/ctypes.h" +#include "kleidicv/kleidicv.h" + +static constexpr std::array kAllBorders = { + KLEIDICV_BORDER_TYPE_REPLICATE, + KLEIDICV_BORDER_TYPE_REFLECT, + KLEIDICV_BORDER_TYPE_WRAP, + KLEIDICV_BORDER_TYPE_REVERSE, +}; + +static constexpr size_t kKernelSize = 5; +static constexpr size_t kMinWidthHeight = kKernelSize - 1; + +#define KLEIDICV_BLUR_AND_DOWNSAMPLE(type, type_suffix) \ + KLEIDICV_API(blur_and_downsample, \ + kleidicv_blur_and_downsample_##type_suffix, type) + +KLEIDICV_BLUR_AND_DOWNSAMPLE(uint8_t, u8); + +template +struct BlurAndDownsampleKernelTestParams; + +template <> +struct BlurAndDownsampleKernelTestParams { + using InputType = uint8_t; + using IntermediateType = uint64_t; + using OutputType = uint8_t; +}; // end of struct BlurAndDownsampleKernelTestParams + +// Test for kleidicv_blur_and_downsample_ +template +class BlurAndDownsampleTest : public test::KernelTest { + using Base = test::KernelTest; + using typename Base::InputType; + using typename Base::IntermediateType; + using typename Base::OutputType; + using ArrayContainerType = + std::invoke_result_t; + + public: + BlurAndDownsampleTest(KernelTestParams, + ArrayLayoutsGetterType array_layouts_getter, + BorderTcontainerType border_types) + : array_layouts_{array_layouts_getter(kMinWidthHeight, kMinWidthHeight)}, + border_types_{border_types}, + array_layout_generator_{array_layouts_}, + border_type_generator_{border_types_} {} + + void test(const test::Array2D &mask) { + test::Kernel kernel{mask}; + // Create generators and execute test. + test::SequenceGenerator tested_border_values{test::default_border_values()}; + test::PseudoRandomNumberGenerator element_generator; + Base::test(kernel, array_layout_generator_, border_type_generator_, + tested_border_values, element_generator); + } + + private: + kleidicv_error_t call_api(const test::Array2D *input, + test::Array2D *output, + kleidicv_border_type_t border_type, + kleidicv_border_values_t) override { + kleidicv_filter_context_t *context = nullptr; + auto ret = kleidicv_filter_context_create( + &context, input->channels(), kKernelSize, kKernelSize, + input->width() / input->channels(), input->height()); + if (ret != KLEIDICV_OK) { + return ret; + } + + ret = blur_and_downsample()( + input->data(), input->stride(), input->width() / input->channels(), + input->height(), output->data(), output->stride(), input->channels(), + border_type, context); + auto releaseRet = kleidicv_filter_context_release(context); + if (releaseRet != KLEIDICV_OK) { + return releaseRet; + } + + return ret; + } + + // Base class' functionality is ovewritten as pixels in odd rows and columns + // are dropped. This test implementation only supports single-channel data. + void calculate_expected( + const test::Kernel &kernel, + const test::TwoDimensional &source) override { + for (size_t row = 0; row < Base::expected_.height(); ++row) { + for (size_t column = 0; column < Base::expected_.width(); ++column) { + IntermediateType result; + result = Base::calculate_expected_at(kernel, source, (row * 2), + (column * 2)); + Base::expected_.at(row, column)[0] = + static_cast(scale_result(kernel, result)); + } + } + } + + // Base class' functionality is ovewritten as the output has half the width + // and height compared to the input in case of blur_and_downsample + void create_arrays(const test::Kernel &kernel, + const test::ArrayLayout &array_layout) override { + Base::input_ = test::Array2D{array_layout}; + ASSERT_TRUE(Base::input_.valid()); + + test::ArrayLayout output_array_layout{ + (array_layout.width + 1) / 2, (array_layout.height + 1) / 2, + array_layout.padding, array_layout.channels}; + + Base::expected_ = test::Array2D{output_array_layout}; + ASSERT_TRUE(Base::expected_.valid()); + + Base::actual_ = test::Array2D{output_array_layout}; + ASSERT_TRUE(Base::actual_.valid()); + + Base::input_with_borders_ = test::Array2D{ + array_layout.width + + (kernel.left() + kernel.right()) * array_layout.channels, + array_layout.height + kernel.top() + kernel.bottom(), 0, + array_layout.channels}; + ASSERT_TRUE(Base::input_with_borders_.valid()); + } + + // Apply rounding to nearest integer division. + IntermediateType scale_result(const test::Kernel &, + IntermediateType result) override { + return (result + 128) / 256; + } + + const ArrayContainerType array_layouts_; + const BorderTcontainerType border_types_; + test::SequenceGenerator array_layout_generator_; + test::SequenceGenerator border_type_generator_; +}; // end of class BlurAndDownsampleTest + +using ElementTypes = ::testing::Types; + +template +class BlurAndDownsample : public testing::Test {}; + +TYPED_TEST_SUITE(BlurAndDownsample, ElementTypes); + +TYPED_TEST(BlurAndDownsample, API) { + using KernelTestParams = BlurAndDownsampleKernelTestParams; + test::Array2D mask{kKernelSize, + kKernelSize}; + // clang-format off + mask.set(0, 0, { 1, 4, 6, 4, 1}); + mask.set(1, 0, { 4, 16, 24, 16, 4}); + mask.set(2, 0, { 6, 24, 36, 24, 6}); + mask.set(3, 0, { 4, 16, 24, 16, 4}); + mask.set(4, 0, { 1, 4, 6, 4, 1}); + // clang-format on + BlurAndDownsampleTest{KernelTestParams{}, + test::default_1channel_array_layouts, kAllBorders} + .test(mask); +} + +// A simple test suite to test functionality without the kernel test framework +TEST(BlurAndDownsample, Minimal_u8) { + using TypeParam = uint8_t; + + test::Array2D input{5, 5}; + // clang-format off + input.set(0, 0, { 1, 5, 17, 38, 89}); + input.set(1, 0, { 5, 17, 38, 89, 171}); + input.set(2, 0, { 17, 38, 89, 171, 250}); + input.set(3, 0, { 38, 89, 171, 250, 101}); + input.set(4, 0, { 89, 171, 250, 101, 1}); + // clang-format on + + test::Array2D expected{3, 3}; + // clang-format off + expected.set(0, 0, { 6, 35, 99}); + expected.set(1, 0, { 35, 103, 161}); + expected.set(2, 0, { 99, 161, 78}); + // clang-format on + + test::Array2D actual{3, 3}; + + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, input.channels(), 5, 5, + input.width() / input.channels(), input.height())); + + EXPECT_EQ(KLEIDICV_OK, + blur_and_downsample()( + input.data(), input.stride(), input.width() / input.channels(), + input.height(), actual.data(), actual.stride(), + input.channels(), KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + EXPECT_EQ_ARRAY2D(expected, actual); +} + +TYPED_TEST(BlurAndDownsample, UnsupportedBorderType) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, kKernelSize, kKernelSize, + kMinWidthHeight, kMinWidthHeight)); + TypeParam src[1] = {}, dst[1]; + for (kleidicv_border_type_t border : { + KLEIDICV_BORDER_TYPE_CONSTANT, + KLEIDICV_BORDER_TYPE_TRANSPARENT, + KLEIDICV_BORDER_TYPE_NONE, + }) { + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + blur_and_downsample()( + src, sizeof(TypeParam), kMinWidthHeight, kMinWidthHeight, dst, + sizeof(TypeParam), 1, border, context)); + } + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(BlurAndDownsample, NullPointer) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, kKernelSize, kKernelSize, + kMinWidthHeight, kMinWidthHeight)); + TypeParam src[1] = {}, dst[1]; + test::test_null_args(blur_and_downsample(), src, sizeof(TypeParam), + kMinWidthHeight, kMinWidthHeight, dst, sizeof(TypeParam), + 1, KLEIDICV_BORDER_TYPE_REPLICATE, context); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(BlurAndDownsample, Misalignment) { + if (sizeof(TypeParam) == 1) { + // misalignment impossible + return; + } + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, kKernelSize, kKernelSize, + kMinWidthHeight, kMinWidthHeight)); + TypeParam src[1] = {}, dst[1]; + + EXPECT_EQ( + KLEIDICV_ERROR_ALIGNMENT, + blur_and_downsample()( + src, sizeof(TypeParam) + 1, kMinWidthHeight, kMinWidthHeight, dst, + sizeof(TypeParam), 1, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ( + KLEIDICV_ERROR_ALIGNMENT, + blur_and_downsample()( + src, sizeof(TypeParam), kMinWidthHeight, kMinWidthHeight, dst, + sizeof(TypeParam) + 1, 1, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(BlurAndDownsample, UndersizeImage) { + kleidicv_filter_context_t *context = nullptr; + const size_t underSize = kKernelSize - 2; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, kKernelSize, kKernelSize, + kMinWidthHeight, kMinWidthHeight)); + TypeParam src[1] = {}, dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + blur_and_downsample()( + src, sizeof(TypeParam), underSize, underSize, dst, + sizeof(TypeParam), 1, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + blur_and_downsample()( + src, sizeof(TypeParam), underSize, kMinWidthHeight, dst, + sizeof(TypeParam), 1, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + blur_and_downsample()( + src, sizeof(TypeParam), kMinWidthHeight, underSize, dst, + sizeof(TypeParam), 1, KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(BlurAndDownsample, OversizeImage) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, kKernelSize, kKernelSize, 1, 1)); + TypeParam src[1], dst[1]; + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + blur_and_downsample()( + src, sizeof(TypeParam), (KLEIDICV_MAX_IMAGE_PIXELS / 4) + 1, 4, dst, + sizeof(TypeParam), 1, KLEIDICV_BORDER_TYPE_REFLECT, context)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + blur_and_downsample()( + src, sizeof(TypeParam), KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, dst, sizeof(TypeParam), 1, + KLEIDICV_BORDER_TYPE_REFLECT, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(BlurAndDownsample, ChannelNumber) { + kleidicv_filter_context_t *context = nullptr; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 2, kKernelSize, kKernelSize, + kMinWidthHeight, kMinWidthHeight)); + TypeParam src[1], dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + blur_and_downsample()( + src, sizeof(TypeParam), kMinWidthHeight, kMinWidthHeight, dst, + sizeof(TypeParam), 2, KLEIDICV_BORDER_TYPE_REFLECT, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TYPED_TEST(BlurAndDownsample, InvalidContextImageSize) { + kleidicv_filter_context_t *context = nullptr; + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, 1, kKernelSize, kKernelSize, + kMinWidthHeight, kMinWidthHeight)); + TypeParam src[1], dst[1]; + EXPECT_EQ(KLEIDICV_ERROR_CONTEXT_MISMATCH, + blur_and_downsample()( + src, sizeof(TypeParam), kMinWidthHeight + 1, kMinWidthHeight, + dst, sizeof(TypeParam), 1, + + KLEIDICV_BORDER_TYPE_REFLECT, context)); + EXPECT_EQ(KLEIDICV_ERROR_CONTEXT_MISMATCH, + blur_and_downsample()( + src, sizeof(TypeParam), kMinWidthHeight, kMinWidthHeight + 1, + dst, sizeof(TypeParam), 1, + + KLEIDICV_BORDER_TYPE_REFLECT, context)); + EXPECT_EQ( + KLEIDICV_ERROR_CONTEXT_MISMATCH, + blur_and_downsample()( + src, sizeof(TypeParam), kMinWidthHeight + 1, kMinWidthHeight + 1, dst, + sizeof(TypeParam), 1, KLEIDICV_BORDER_TYPE_REFLECT, context)); + + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} diff --git a/test/api/test_gaussian_blur.cpp b/test/api/test_gaussian_blur.cpp index e559925ccdc721e1c6421f337fd626ac5eb23600..b8b6b3c411934549c5035b0a359b720672c490c3 100644 --- a/test/api/test_gaussian_blur.cpp +++ b/test/api/test_gaussian_blur.cpp @@ -30,12 +30,9 @@ struct GaussianBlurKernelTestParams { static constexpr size_t kKernelSize = KernelSize; }; // end of struct GaussianBlurKernelTestParams -static constexpr std::array kDefaultBorder = { +static constexpr std::array kReplicateBorder = { KLEIDICV_BORDER_TYPE_REPLICATE}; -static constexpr std::array kReflectBorder = { - KLEIDICV_BORDER_TYPE_REFLECT}; - static constexpr std::array kAllBorders = { KLEIDICV_BORDER_TYPE_REPLICATE, KLEIDICV_BORDER_TYPE_REFLECT, @@ -43,60 +40,39 @@ static constexpr std::array kAllBorders = { KLEIDICV_BORDER_TYPE_REVERSE, }; -template -std::unique_ptr> -make_generator_ptr(IterableType &elements) { - test::Generator *pg = - new test::SequenceGenerator(elements); - return std::unique_ptr>( - pg); -} - // Test for GaussianBlur operator. -template +template class GaussianBlurTest : public test::KernelTest { using Base = test::KernelTest; using typename test::KernelTest::InputType; using typename test::KernelTest::IntermediateType; using typename test::KernelTest::OutputType; + using ArrayContainerType = + std::invoke_result_t; public: - GaussianBlurTest() - : small_array_layouts_{test::small_array_layouts( - KernelTestParams::kKernelSize, KernelTestParams::kKernelSize)} { - array_layout_generator_ = make_generator_ptr(small_array_layouts_); - border_type_generator_ = make_generator_ptr(kDefaultBorder); - } - - GaussianBlurTest &with_array_layouts( - std::unique_ptr> g) { - array_layout_generator_ = std::move(g); - return *this; - } - - GaussianBlurTest &with_border_types( - std::unique_ptr> g) { - border_type_generator_ = std::move(g); - return *this; - } + explicit GaussianBlurTest( + KernelTestParams, + ArrayLayoutsGetterType array_layouts_getter = test::small_array_layouts, + BorderContainerType border_types = kAllBorders) + : array_layouts_{array_layouts_getter(KernelTestParams::kKernelSize - 1, + KernelTestParams::kKernelSize - 1)}, + border_types_{border_types}, + array_layout_generator_{array_layouts_}, + border_type_generator_{border_types_} {} void test(const test::Array2D &mask) { test::Kernel kernel{mask}; - // Use the default border values for testing. - auto kSupportedBorderValues = test::default_border_values(); // Create generators and execute test. - test::SequenceGenerator tested_border_values{kSupportedBorderValues}; + test::SequenceGenerator tested_border_values{test::default_border_values()}; test::PseudoRandomNumberGenerator element_generator; - Base::test(kernel, *array_layout_generator_, *border_type_generator_, + Base::test(kernel, array_layout_generator_, border_type_generator_, tested_border_values, element_generator); } - protected: - std::array small_array_layouts_; - std::unique_ptr> array_layout_generator_; - std::unique_ptr> - border_type_generator_; - + private: kleidicv_error_t call_api(const test::Array2D *input, test::Array2D *output, kleidicv_border_type_t border_type, @@ -133,7 +109,13 @@ class GaussianBlurTest : public test::KernelTest { : ((result + 524288) / 1048576); // NOLINTEND(readability-avoid-nested-conditional-operator) } -}; // end of class GaussianBlurTest + + const ArrayContainerType array_layouts_; + const BorderContainerType border_types_; + test::SequenceGenerator array_layout_generator_; + test::SequenceGenerator border_type_generator_; +}; // end of class GaussianBlurTest using ElementTypes = ::testing::Types; @@ -153,15 +135,11 @@ TYPED_TEST(GaussianBlur, 3x3Small) { mask.set(1, 0, { 2, 4, 2}); mask.set(2, 0, { 1, 2, 1}); // clang-format on - GaussianBlurTest{} - .with_border_types(make_generator_ptr(kAllBorders)) - .test(mask); + GaussianBlurTest{KernelTestParams{}}.test(mask); } TYPED_TEST(GaussianBlur, 3x3Default) { using KernelTestParams = GaussianBlurKernelTestParams; - std::array medium_array_layouts_3x3 = - test::default_array_layouts(3, 3); // 3x3 GaussianBlur operator. test::Array2D mask{3, 3}; // clang-format off @@ -169,9 +147,8 @@ TYPED_TEST(GaussianBlur, 3x3Default) { mask.set(1, 0, { 2, 4, 2}); mask.set(2, 0, { 1, 2, 1}); // clang-format on - GaussianBlurTest{} - .with_array_layouts(make_generator_ptr(medium_array_layouts_3x3)) - .with_border_types(make_generator_ptr(kReflectBorder)) + GaussianBlurTest{KernelTestParams{}, test::default_array_layouts, + kReplicateBorder} .test(mask); } @@ -187,9 +164,7 @@ TYPED_TEST(GaussianBlur, 5x5) { mask.set(3, 0, { 4, 16, 24, 16, 4}); mask.set(4, 0, { 1, 4, 6, 4, 1}); // clang-format on - GaussianBlurTest{} - .with_border_types(make_generator_ptr(kAllBorders)) - .test(mask); + GaussianBlurTest{KernelTestParams{}}.test(mask); } // Tests gaussian_blur_7x7_ API. @@ -206,9 +181,7 @@ TYPED_TEST(GaussianBlur, 7x7) { mask.set(5, 0, { 14, 49, 98, 126, 98, 49, 14 }); mask.set(6, 0, { 4, 14, 28, 36, 28, 14, 4 }); // clang-format on - GaussianBlurTest{} - .with_border_types(make_generator_ptr(kAllBorders)) - .test(mask); + GaussianBlurTest{KernelTestParams{}}.test(mask); } // Tests gaussian_blur_15x15_ API. @@ -233,9 +206,7 @@ TYPED_TEST(GaussianBlur, 15x15) { mask.set(13, 0, { 44, 121, 275, 528, 891, 1298, 1606, 1738, 1606, 1298, 891, 528, 275, 121, 44 }); mask.set(14, 0, { 16, 44, 100, 192, 324, 472, 584, 632, 584, 472, 324, 192, 100, 44, 16 }); // clang-format on - GaussianBlurTest{} - .with_border_types(make_generator_ptr(kAllBorders)) - .test(mask); + GaussianBlurTest{KernelTestParams{}}.test(mask); } TYPED_TEST(GaussianBlur, 3x3_CustomSigma) { diff --git a/test/api/test_morphology.cpp b/test/api/test_morphology.cpp index 19f998ef86b903d8c8055ff565f37ef67870c4ba..ffcde1243488b562593e0c651e7a70b285ac1515 100644 --- a/test/api/test_morphology.cpp +++ b/test/api/test_morphology.cpp @@ -10,30 +10,44 @@ #include "framework/array.h" #include "framework/generator.h" #include "framework/kernel.h" -#include "framework/operation.h" #include "kleidicv/kleidicv.h" #include "test_config.h" -#define KLEIDICV_PARAMS(name, impl, type, op) \ - template , bool> = true> \ - class name { \ - public: \ - static decltype(auto) api() { return impl; } \ - static decltype(auto) operation() { \ - return [](type a, type b) { return op(a, b); }; \ - } \ - }; +#define KLEIDICV_DILATE(type, type_suffix) \ + KLEIDICV_API(dilate, kleidicv_dilate_##type_suffix, type) -KLEIDICV_PARAMS(DilateParams, kleidicv_dilate_u8, uint8_t, std::max); -KLEIDICV_PARAMS(ErodeParams, kleidicv_erode_u8, uint8_t, std::min); +KLEIDICV_DILATE(uint8_t, u8); template -struct MorphologyKernelTestParams { +class DilateParams { + public: + using InputType = ElementType; + using IntermediateType = ElementType; + using OutputType = ElementType; + + static decltype(auto) api() { return dilate(); } + static decltype(auto) operation() { + return [](ElementType a, ElementType b) { return std::max(a, b); }; + } +}; // end of class DilateParams + +#define KLEIDICV_ERODE(type, type_suffix) \ + KLEIDICV_API(erode, kleidicv_erode_##type_suffix, type) + +KLEIDICV_ERODE(uint8_t, u8); +template + +class ErodeParams { + public: using InputType = ElementType; using IntermediateType = ElementType; using OutputType = ElementType; -}; // end of struct MorphologyKernelTestParams + + static decltype(auto) api() { return erode(); } + static decltype(auto) operation() { + return [](ElementType a, ElementType b) { return std::min(a, b); }; + } +}; // end of class ErodeParams static constexpr std::array kDefaultBorder = { KLEIDICV_BORDER_TYPE_REPLICATE}; @@ -41,11 +55,6 @@ static constexpr std::array kDefaultBorder = { static constexpr std::array kConstantBorder = { KLEIDICV_BORDER_TYPE_CONSTANT}; -static constexpr std::array kDefaultBorderValues = - {{ - {0, 0, 0, 0}, // default - }}; - template static const std::array &more_border_values() { using limit = std::numeric_limits; @@ -57,35 +66,38 @@ static const std::array &more_border_values() { return values; } -template -std::unique_ptr> -make_generator_ptr(IterableType &elements) { - test::Generator *pg = - new test::SequenceGenerator(elements); - return std::unique_ptr>( - pg); -} - -template class OperationParams, - size_t kernelWidth, size_t kernelHeight> -class MorphologyTest - : public test::KernelTest> { - using Base = test::KernelTest>; +template > +class MorphologyTest : public test::KernelTest { + using Base = test::KernelTest; using typename Base::InputType; using typename Base::IntermediateType; using typename Base::OutputType; + using ArrayContainerType = + std::invoke_result_t; public: - MorphologyTest() - : mask_{kernelWidth, kernelHeight}, + MorphologyTest( + MorphologyKernelTestParams, size_t kernel_width, size_t kernel_height, + ArrayLayoutsGetterType array_layouts_getter = test::small_array_layouts, + BorderContainerType border_types = kDefaultBorder, + BorderValuesContainerType border_values = test::default_border_values()) + : kernel_width_{kernel_width}, + kernel_height_{kernel_height}, + mask_{kernel_width, kernel_height}, kernel_{mask_}, iterations_{1}, - small_array_layouts_{ - test::small_array_layouts(kernelWidth, kernelHeight)} { - array_layout_generator_ = make_generator_ptr(small_array_layouts_); - border_type_generator_ = make_generator_ptr(kDefaultBorder); - border_values_generator_ = make_generator_ptr(kDefaultBorderValues); - } + array_layouts_{ + array_layouts_getter(std::max(kernel_width - 1, 1), + std::max(kernel_height - 1, 1))}, + border_types_{border_types}, + border_values_{border_values}, + array_layout_generator_{array_layouts_}, + border_type_generator_{border_types_}, + border_values_generator_{border_values_} {} MorphologyTest &with_anchor(test::Point anchor) { kernel_ = test::Kernel(mask_, anchor); @@ -97,47 +109,19 @@ class MorphologyTest return *this; } - MorphologyTest &with_array_layouts( - std::unique_ptr> g) { - array_layout_generator_ = std::move(g); - return *this; - } - - MorphologyTest &with_border_types( - std::unique_ptr> g) { - border_type_generator_ = std::move(g); - return *this; - } - - MorphologyTest &with_border_values( - std::unique_ptr> g) { - border_values_generator_ = std::move(g); - return *this; - } - void test() { test::PseudoRandomNumberGenerator element_generator; - Base::test(kernel_, *array_layout_generator_, *border_type_generator_, - *border_values_generator_, element_generator); + Base::test(kernel_, array_layout_generator_, border_type_generator_, + border_values_generator_, element_generator); } - protected: - test::Array2D mask_; - test::Kernel kernel_; - size_t iterations_; - std::array small_array_layouts_; - std::unique_ptr> array_layout_generator_; - std::unique_ptr> - border_type_generator_; - std::unique_ptr> - border_values_generator_; - + private: kleidicv_error_t call_api(const test::Array2D *input, test::Array2D *output, kleidicv_border_type_t border_type, kleidicv_border_values_t border_values) override { kleidicv_morphology_context_t *context = nullptr; - auto kernelRect = kleidicv_rectangle_t{kernelWidth, kernelHeight}; + auto kernelRect = kleidicv_rectangle_t{kernel_width_, kernel_height_}; kleidicv_point_t anchor{kernel_.anchor().x, kernel_.anchor().y}; auto ret = kleidicv_morphology_create( &context, kernelRect, anchor, border_type, border_values, @@ -148,7 +132,7 @@ class MorphologyTest return ret; } - ret = OperationParams::api()( + ret = MorphologyKernelTestParams::api()( input->data(), input->stride(), output->data(), output->stride(), input->width() / input->channels(), input->height(), context); auto releaseRet = kleidicv_morphology_release(context); @@ -182,15 +166,27 @@ class MorphologyTest IntermediateType result = source.at(row, column)[0]; for (size_t height = 0; height < kernel.height(); ++height) { for (size_t width = 0; width < kernel.width(); ++width) { - result = OperationParams::operation()( + result = MorphologyKernelTestParams::operation()( result, source.at(row + height, column + width * source.channels())[0]); } } return result; } -}; // end of class class MorphologyTest + + const size_t kernel_width_; + const size_t kernel_height_; + const test::Array2D mask_; + test::Kernel kernel_; + size_t iterations_; + const ArrayContainerType array_layouts_; + const BorderContainerType border_types_; + const BorderValuesContainerType border_values_; + test::SequenceGenerator array_layout_generator_; + test::SequenceGenerator border_type_generator_; + test::SequenceGenerator border_values_generator_; +}; // end of class MorphologyTest template class Morphology : public testing::Test {}; @@ -200,19 +196,15 @@ using ElementTypes = ::testing::Types; TYPED_TEST_SUITE(Morphology, ElementTypes); TYPED_TEST(Morphology, 1xN) { - std::array medium_array_layouts_3x3 = - test::default_array_layouts(3, 3); + MorphologyTest{DilateParams{}, 1, 1}.test(); + MorphologyTest{ErodeParams{}, 1, 1}.test(); - MorphologyTest{}.test(); - MorphologyTest{}.test(); - MorphologyTest{} - .with_array_layouts(make_generator_ptr(medium_array_layouts_3x3)) + MorphologyTest{DilateParams{}, 1, 2, test::default_array_layouts} .test(); - MorphologyTest{} - .with_array_layouts(make_generator_ptr(medium_array_layouts_3x3)) + MorphologyTest{ErodeParams{}, 1, 2, test::default_array_layouts} .test(); - MorphologyTest{}.test(); - MorphologyTest{}.test(); + MorphologyTest{DilateParams{}, 3, 1}.test(); + MorphologyTest{ErodeParams{}, 3, 1}.test(); } std::array get_large_array_layouts(size_t min_width, @@ -232,80 +224,77 @@ std::array get_large_array_layouts(size_t min_width, } TYPED_TEST(Morphology, LargeArrays) { - std::array large_array_layouts = - get_large_array_layouts(3, 3); - - MorphologyTest{} - .with_array_layouts(make_generator_ptr(large_array_layouts)) + MorphologyTest{DilateParams{}, 3, 3, get_large_array_layouts} .test(); - MorphologyTest{} - .with_array_layouts(make_generator_ptr(large_array_layouts)) + MorphologyTest{ErodeParams{}, 3, 3, get_large_array_layouts} .test(); - MorphologyTest{} - .with_border_types(make_generator_ptr(kConstantBorder)) - .with_array_layouts(make_generator_ptr(large_array_layouts)) + + MorphologyTest{DilateParams{}, 3, 3, get_large_array_layouts, + kConstantBorder} .test(); - MorphologyTest{} - .with_border_types(make_generator_ptr(kConstantBorder)) - .with_array_layouts(make_generator_ptr(large_array_layouts)) + MorphologyTest{ErodeParams{}, 3, 3, get_large_array_layouts, + kConstantBorder} .test(); } TYPED_TEST(Morphology, MediumArrays) { - std::array medium_array_layouts_3x3 = - test::default_array_layouts(3, 3); - MorphologyTest{} - .with_array_layouts(make_generator_ptr(medium_array_layouts_3x3)) + MorphologyTest{DilateParams{}, 3, 3, test::default_array_layouts} .test(); - MorphologyTest{} - .with_array_layouts(make_generator_ptr(medium_array_layouts_3x3)) + MorphologyTest{ErodeParams{}, 3, 3, test::default_array_layouts} .test(); - std::array medium_array_layouts_5x5 = - test::default_array_layouts(5, 5); - MorphologyTest{} - .with_array_layouts(make_generator_ptr(medium_array_layouts_5x5)) + + MorphologyTest{DilateParams{}, 5, 5, test::default_array_layouts} .test(); - MorphologyTest{} - .with_array_layouts(make_generator_ptr(medium_array_layouts_5x5)) + MorphologyTest{ErodeParams{}, 5, 5, test::default_array_layouts} .test(); } TYPED_TEST(Morphology, BorderValues) { - MorphologyTest{} - .with_border_types(make_generator_ptr(kConstantBorder)) - .with_border_values(make_generator_ptr(more_border_values())) + MorphologyTest{DilateParams{}, + 3, + 3, + test::small_array_layouts, + kConstantBorder, + more_border_values()} .test(); - MorphologyTest{} - .with_border_types(make_generator_ptr(kConstantBorder)) - .with_border_values(make_generator_ptr(more_border_values())) + MorphologyTest{ErodeParams{}, + 3, + 3, + test::small_array_layouts, + kConstantBorder, + more_border_values()} .test(); } TYPED_TEST(Morphology, UnortodoxSizes) { - MorphologyTest{}.test(); - MorphologyTest{}.test(); - MorphologyTest{}.test(); - MorphologyTest{}.test(); - MorphologyTest{}.test(); + MorphologyTest{DilateParams{}, 4, 4}.test(); + MorphologyTest{ErodeParams{}, 7, 5}.test(); + + MorphologyTest{DilateParams{}, 8, 4}.test(); + MorphologyTest{DilateParams{}, 6, 10}.test(); + MorphologyTest{ErodeParams{}, 12, 4}.test(); } TYPED_TEST(Morphology, Iterations) { - MorphologyTest{}.with_iterations(2).test(); - MorphologyTest{}.with_iterations(3).test(); - MorphologyTest{}.with_iterations(4).test(); + MorphologyTest{DilateParams{}, 3, 3}.with_iterations(2).test(); + MorphologyTest{ErodeParams{}, 6, 4}.with_iterations(3).test(); + MorphologyTest{DilateParams{}, 2, 7}.with_iterations(4).test(); } TYPED_TEST(Morphology, Anchors) { - MorphologyTest{}.with_anchor({0, 0}).test(); - MorphologyTest{} - .with_border_types(make_generator_ptr(kConstantBorder)) + MorphologyTest{ErodeParams{}, 3, 5}.with_anchor({0, 0}).test(); + + MorphologyTest{DilateParams{}, 3, 5, test::small_array_layouts, + kConstantBorder} .with_anchor({2, 0}) .test(); - MorphologyTest{} - .with_border_types(make_generator_ptr(kConstantBorder)) + + MorphologyTest{ErodeParams{}, 3, 5, test::small_array_layouts, + kConstantBorder} .with_anchor({0, 4}) .test(); - MorphologyTest{}.with_anchor({2, 4}).test(); + + MorphologyTest{DilateParams{}, 3, 5}.with_anchor({2, 4}).test(); } static kleidicv_error_t make_minimal_context( diff --git a/test/api/test_separable_filter_2d.cpp b/test/api/test_separable_filter_2d.cpp index b89f3553ecf9528f20386a7f794d72fdccf22f4d..44862bc48cfcaf18150ee86b8371f1daa29a9264 100644 --- a/test/api/test_separable_filter_2d.cpp +++ b/test/api/test_separable_filter_2d.cpp @@ -46,9 +46,6 @@ struct SeparableFilter2DKernelTestParams { static constexpr size_t kKernelSize = KernelSize; }; // end of struct SeparableFilter2DKernelTestParams -static constexpr std::array kDefaultBorder = { - KLEIDICV_BORDER_TYPE_REPLICATE}; - static constexpr std::array kAllBorders = { KLEIDICV_BORDER_TYPE_REPLICATE, KLEIDICV_BORDER_TYPE_REFLECT, @@ -56,66 +53,42 @@ static constexpr std::array kAllBorders = { KLEIDICV_BORDER_TYPE_REVERSE, }; -template -std::unique_ptr> -make_generator_ptr(IterableType &elements) { - test::Generator *pg = - new test::SequenceGenerator(elements); - return std::unique_ptr>( - pg); -} - // Test for SeparableFilter2D operator. -template +template class SeparableFilter2DTest : public test::KernelTest { using Base = test::KernelTest; using typename test::KernelTest::InputType; using typename test::KernelTest::IntermediateType; using typename test::KernelTest::OutputType; + using ArrayContainerType = + std::invoke_result_t; public: - explicit SeparableFilter2DTest(const InputType *kernel_x, - const InputType *kernel_y) - : kernel_x_(kernel_x), - kernel_y_(kernel_y), - small_array_layouts_{test::small_array_layouts( - KernelTestParams::kKernelSize, KernelTestParams::kKernelSize)} { - array_layout_generator_ = make_generator_ptr(small_array_layouts_); - border_type_generator_ = make_generator_ptr(kDefaultBorder); - } - - SeparableFilter2DTest &with_array_layouts( - std::unique_ptr> g) { - array_layout_generator_ = std::move(g); - return *this; - } - - SeparableFilter2DTest &with_border_types( - std::unique_ptr> g) { - border_type_generator_ = std::move(g); - return *this; - } + explicit SeparableFilter2DTest(KernelTestParams, + ArrayLayoutsGetterType array_layouts_getter, + BorderTcontainerType border_types, + const KernelType &kernel_x, + const KernelType &kernel_y) + : array_layouts_{array_layouts_getter(KernelTestParams::kKernelSize - 1, + KernelTestParams::kKernelSize - 1)}, + border_types_{border_types}, + array_layout_generator_{array_layouts_}, + border_type_generator_{border_types_}, + kernel_x_(kernel_x), + kernel_y_(kernel_y) {} void test(const test::Array2D &mask, InputType max_value) { test::Kernel kernel{mask}; - // Use the default border values for testing. - auto kSupportedBorderValues = test::default_border_values(); // Create generators and execute test. - test::SequenceGenerator tested_border_values{kSupportedBorderValues}; + test::SequenceGenerator tested_border_values{test::default_border_values()}; test::PseudoRandomNumberGeneratorIntRange element_generator{ 0, max_value}; - Base::test(kernel, *array_layout_generator_, *border_type_generator_, + Base::test(kernel, array_layout_generator_, border_type_generator_, tested_border_values, element_generator); } - protected: - const InputType *kernel_x_; - const InputType *kernel_y_; - std::array small_array_layouts_; - std::unique_ptr> array_layout_generator_; - std::unique_ptr> - border_type_generator_; - + private: kleidicv_error_t call_api(const test::Array2D *input, test::Array2D *output, kleidicv_border_type_t border_type, @@ -132,7 +105,7 @@ class SeparableFilter2DTest : public test::KernelTest { ret = separable_filter_2d()( input->data(), input->stride(), output->data(), output->stride(), input->width() / input->channels(), input->height(), input->channels(), - kernel_x_, KernelTestParams::kKernelSize, kernel_y_, + kernel_x_.data(), KernelTestParams::kKernelSize, kernel_y_.data(), KernelTestParams::kKernelSize, border_type, context); auto releaseRet = kleidicv_filter_context_release(context); if (releaseRet != KLEIDICV_OK) { @@ -141,7 +114,15 @@ class SeparableFilter2DTest : public test::KernelTest { return ret; } -}; // end of class SeparableFilter2DTest + + const ArrayContainerType array_layouts_; + const BorderTcontainerType border_types_; + test::SequenceGenerator array_layout_generator_; + test::SequenceGenerator border_type_generator_; + const KernelType &kernel_x_; + const KernelType &kernel_y_; +}; // end of class SeparableFilter2DTest using ElementTypes = ::testing::Types; @@ -154,8 +135,8 @@ TYPED_TEST_SUITE(SeparableFilter2D, ElementTypes); TYPED_TEST(SeparableFilter2D, 5x5) { using KernelTestParams = SeparableFilter2DKernelTestParams; - const TypeParam kernel_x[5] = {5, 0, 1, 2, 2}; - const TypeParam kernel_y[5] = {1, 4, 3, 1, 0}; + const std::array kernel_x = {5, 0, 1, 2, 2}; + const std::array kernel_y = {1, 4, 3, 1, 0}; // Mask is created by 'kernel_y (outer product) kernel_x' test::Array2D mask{5, 5}; @@ -163,8 +144,8 @@ TYPED_TEST(SeparableFilter2D, 5x5) { return kernel_y[row] * kernel_x[column]; }); - SeparableFilter2DTest{kernel_x, kernel_y} - .with_border_types(make_generator_ptr(kAllBorders)) + SeparableFilter2DTest{KernelTestParams{}, test::small_array_layouts, + kAllBorders, kernel_x, kernel_y} .test(mask, 5); } diff --git a/test/api/test_sobel.cpp b/test/api/test_sobel.cpp index af60dc010524e887e45cc7853a64b8b616bdf470..03c6a153b8c1bf9195cc4beb06f529a93f28ce98 100644 --- a/test/api/test_sobel.cpp +++ b/test/api/test_sobel.cpp @@ -65,13 +65,11 @@ class Sobel3x3Test : public test::KernelTest { test::Kernel kernel{mask}; // Use the default array layouts for testing. auto array_layouts = - test::default_array_layouts(mask.width(), mask.height()); - // Use the default border values for testing. - auto kSupportedBorderValues = test::default_border_values(); + test::default_array_layouts(mask.width() - 1, mask.height() - 1); // Create generators and execute test. test::SequenceGenerator tested_array_layouts{array_layouts}; test::SequenceGenerator tested_borders{kSupportedBorders}; - test::SequenceGenerator tested_border_values{kSupportedBorderValues}; + test::SequenceGenerator tested_border_values{test::default_border_values()}; test::PseudoRandomNumberGenerator element_generator; Base::test(kernel, tested_array_layouts, tested_borders, tested_border_values, element_generator); diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 41c84f5419039c341e5f60b79b9612c6ee673e3e..86d1ebd7d992e3bd4b1626a6ac31ba6342c16349 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -362,6 +362,69 @@ TEST(ThreadGaussianBlur, NotImplemented) { ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); } +TEST_P(Thread, blur_and_downsample_u8) { + unsigned src_width = 0, src_height = 0, thread_count = 0; + std::tie(src_width, src_height, thread_count) = GetParam(); + size_t channels = 1; + size_t kernel_width = 5; + size_t kernel_height = kernel_width; + kleidicv_border_type_t border_type = KLEIDICV_BORDER_TYPE_REPLICATE; + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, channels, kernel_width, kernel_height, + src_width, src_height)); + + test::Array2D src(size_t{src_width} * channels, src_height); + test::Array2D dst_single(size_t{(src_width + 1) / 2} * channels, + (src_height + 1) / 2), + dst_multi(size_t{(src_width + 1) / 2} * channels, (src_height + 1) / 2); + + test::PseudoRandomNumberGenerator generator; + src.fill(generator); + + kleidicv_error_t single_result = kleidicv_blur_and_downsample_u8( + src.data(), src.stride(), src_width, src_height, dst_single.data(), + dst_single.stride(), channels, border_type, context); + + kleidicv_error_t multi_result = kleidicv_thread_blur_and_downsample_u8( + src.data(), src.stride(), src_width, src_height, dst_multi.data(), + dst_multi.stride(), channels, border_type, context, + get_multithreading_fake(thread_count)); + + EXPECT_EQ(single_result, multi_result); + if (KLEIDICV_OK == single_result) { + EXPECT_EQ_ARRAY2D(dst_multi, dst_single); + } + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + +TEST(ThreadBlurAndDownsample, NotImplemented) { + unsigned max_width = 10, max_height = 10; + size_t channels = 1; + size_t kernel_width = 5; + size_t kernel_height = kernel_width; + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_create( + &context, channels, kernel_width, kernel_height, + max_width, max_height)); + + uint8_t src[1] = {}, dst[1] = {}; + // Image too small + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_thread_blur_and_downsample_u8( + src, 1, 1, 1, dst, 1, channels, KLEIDICV_BORDER_TYPE_REPLICATE, + context, get_multithreading_fake(2))); + // Border not supported + EXPECT_EQ(KLEIDICV_ERROR_NOT_IMPLEMENTED, + kleidicv_thread_blur_and_downsample_u8( + src, 1, max_width, max_height, dst, 1, channels, + KLEIDICV_BORDER_TYPE_TRANSPARENT, context, + get_multithreading_fake(2))); + + ASSERT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); +} + TEST_P(Thread, separable_filter_2d_u8) { check_separable_filter_2d(kleidicv_separable_filter_2d_u8, kleidicv_thread_separable_filter_2d_u8); diff --git a/test/framework/kernel.h b/test/framework/kernel.h index 94ce834391fbcea071b0a16a0d9858cbd49040f0..995f5037b6b08eec8cc0c2c7e4efd5f9affe2a76 100644 --- a/test/framework/kernel.h +++ b/test/framework/kernel.h @@ -202,8 +202,8 @@ class KernelTest { } // Creates arrays for a given layout. - void create_arrays(const Kernel& kernel, - const ArrayLayout& array_layout) { + virtual void create_arrays(const Kernel& kernel, + const ArrayLayout& array_layout) { input_ = Array2D{array_layout}; ASSERT_TRUE(input_.valid()); diff --git a/test/framework/utils.cpp b/test/framework/utils.cpp index 77d00e14926077b6b2e21cf25fd6b29cae703b3c..2c1a8cf09071eb6491678a3bca3b062d56586716 100644 --- a/test/framework/utils.cpp +++ b/test/framework/utils.cpp @@ -59,10 +59,10 @@ template void dump(const TwoDimensional *); template void dump(const TwoDimensional *); template void dump(const TwoDimensional *); -std::array default_border_values() { - return {{ - {0, 0, 0, 0}, // default - }}; +const std::array &default_border_values() { + static const std::array kDefaultBorderValues{ + {{0, 0, 0, 0}}}; + return kDefaultBorderValues; } std::array small_array_layouts(size_t min_width, @@ -111,4 +111,23 @@ std::array default_array_layouts(size_t min_width, }}; } +std::array default_1channel_array_layouts( + size_t min_width, size_t min_height) { + size_t vl = test::Options::vector_length(); + size_t width = std::max(min_width, vl); + size_t height = std::max(min_height, vl); + + return {{ + // clang-format off + // width, height, padding, channels + { min_width, height, 0, 1}, + { min_width, height, vl, 1}, + { width + 1, min_height, 0, 1}, + { 2 * width, min_height, vl, 1}, + { 2 * width + 1, min_height + 1, 0, 1}, + { 4 * width + 1, min_height + 1, vl, 1}, + // clang-format on + }}; +} + } // namespace test diff --git a/test/framework/utils.h b/test/framework/utils.h index ba069b09f2dad637f2f97f6b214bae21b7e48127..656a2a02c2bfc12b1994c57773e2a0f12e52e4df 100644 --- a/test/framework/utils.h +++ b/test/framework/utils.h @@ -114,7 +114,7 @@ template void dump(const TwoDimensional *elements); // Returns default border values. -std::array default_border_values(); +const std::array &default_border_values(); // Returns an array of just a few small layouts. std::array small_array_layouts(size_t min_width, @@ -123,6 +123,9 @@ std::array small_array_layouts(size_t min_width, std::array default_array_layouts(size_t min_width, size_t min_height); +std::array default_1channel_array_layouts( + size_t min_width, size_t min_height); + namespace internal { template class NullPointerTester {