From 35069708bf9c19678c1f41b341b06ecd8fc9f07c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Fri, 12 Jul 2024 18:55:43 +0200 Subject: [PATCH 1/6] Add HAL support for Separable Filter 2D --- adapters/opencv/kleidicv_hal.cpp | 149 +++++++++++++++++++++++++++++++ adapters/opencv/kleidicv_hal.h | 50 +++++++++++ 2 files changed, 199 insertions(+) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 671e45aae..8192263d5 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -282,6 +282,155 @@ static int from_opencv(int opencv_border_type, return 0; } +struct SeparableFilter2DParams { + size_t channels; + kleidicv_border_type_t border_type; + const uint8_t *kernel_x; + size_t kernel_width; + const uint8_t *kernel_y; + size_t kernel_height; + kleidicv_filter_context_t *cached_filter_context; + size_t cached_max_image_width; + size_t cached_max_image_height; +}; + +int separable_filter_2d_init(cvhalFilter2D **context, int src_type, + int dst_type, int kernel_type, uchar *kernelx_data, + int kernelx_length, uchar *kernely_data, + int kernely_length, int anchor_x, int anchor_y, + double delta, int borderType) { + if (src_type != dst_type) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (CV_MAT_DEPTH(src_type) != CV_8U) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (CV_MAT_DEPTH(kernel_type) != CV_8U) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + kleidicv_border_type_t kleidicv_border_type; + if (from_opencv(borderType, kleidicv_border_type)) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (kleidicv_border_type != + kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_REPLICATE) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (anchor_x != -1 || anchor_y != -1) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (delta != 0.0) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + // Use std::unique_ptr to make error returns safer. + auto params = std::make_unique(); + if (!params) { + return CV_HAL_ERROR_UNKNOWN; + } + + const uint8_t *kernel_x = new uint8_t[kernelx_length]; + const uint8_t *kernel_y = new uint8_t[kernely_length]; + + std::memcpy(const_cast(kernel_x), kernelx_data, kernelx_length); + std::memcpy(const_cast(kernel_y), kernely_data, kernely_length); + + params->channels = (src_type >> CV_CN_SHIFT) + 1; + params->border_type = kleidicv_border_type; + + params->kernel_x = kernel_x; + params->kernel_width = static_cast(kernelx_length); + + params->kernel_y = kernel_y; + params->kernel_height = static_cast(kernely_length); + + params->cached_filter_context = nullptr; + + *context = reinterpret_cast(params.release()); + return CV_HAL_ERROR_OK; +} + +int separable_filter_2d_operation(cvhalFilter2D *context, uchar *src_data, + size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, + int full_width, int full_height, int offset_x, + int offset_y) { + if (!context) { + return CV_HAL_ERROR_UNKNOWN; + } + + size_t margin_left = static_cast(offset_x); + size_t margin_top = static_cast(offset_y); + size_t margin_right = static_cast(full_width - width - offset_x); + size_t margin_bottom = static_cast(full_height - height - offset_y); + + if (margin_left != 0 || margin_top != 0 || margin_right != 0 || + margin_bottom != 0) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + auto params = reinterpret_cast(context); + size_t width_sz = static_cast(width); + size_t height_sz = static_cast(height); + + kleidicv_filter_context_t *filter_context = params->cached_filter_context; + if (filter_context && (width_sz > params->cached_max_image_width || + height_sz > params->cached_max_image_height)) { + if (kleidicv_error_t release_err = + kleidicv_filter_context_release(params->cached_filter_context)) { + return convert_error(release_err); + } + + filter_context = nullptr; + } + + if (!filter_context) { + kleidicv_error_t create_err = kleidicv_filter_context_create( + &filter_context, params->channels, params->kernel_width, + params->kernel_height, width_sz, height_sz); + if (create_err) { + return convert_error(create_err); + } + params->cached_filter_context = filter_context; + params->cached_max_image_width = width_sz; + params->cached_max_image_height = height_sz; + } + + kleidicv_error_t filter_err = kleidicv_separable_filter_2d_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), params->channels, + params->kernel_x, params->kernel_width, params->kernel_y, + params->kernel_height, params->border_type, filter_context); + + return convert_error(filter_err); +} + +int separable_filter_2d_free(cvhalFilter2D *context) { + if (!context) { + return CV_HAL_ERROR_UNKNOWN; + } + + std::unique_ptr params( + reinterpret_cast(context)); + delete[] params->kernel_y; + delete[] params->kernel_x; + + if (params->cached_filter_context) { + kleidicv_error_t release_err = + kleidicv_filter_context_release(params->cached_filter_context); + return convert_error(release_err); + } + + return CV_HAL_ERROR_OK; +} + int gaussian_blur_binomial(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int depth, int cn, size_t margin_left, diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 109b20511..6cc6af831 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -49,6 +49,20 @@ int threshold(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType); +int separable_filter_2d_init(cvhalFilter2D **context, int src_type, + int dst_type, int kernel_type, uchar *kernelx_data, + int kernelx_length, uchar *kernely_data, + int kernely_length, int anchor_x, int anchor_y, + double delta, int borderType); + +int separable_filter_2d_operation(cvhalFilter2D *context, uchar *src_data, + size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, + int full_width, int full_height, int offset_x, + int offset_y); + +int separable_filter_2d_free(cvhalFilter2D *context); + int gaussian_blur_binomial(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int depth, int cn, size_t margin_left, @@ -213,6 +227,42 @@ static inline int kleidicv_threshold_with_fallback( #undef cv_hal_threshold #define cv_hal_threshold kleidicv_threshold_with_fallback +// separable_filter_2d_init +static inline int kleidicv_separable_filter_2d_init_with_fallback( + cvhalFilter2D **context, int src_type, int dst_type, int kernel_type, + uchar *kernelx_data, int kernelx_length, uchar *kernely_data, + int kernely_length, int anchor_x, int anchor_y, double delta, + int borderType) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + separable_filter_2d_init, cv_hal_sepFilterInit, context, src_type, + dst_type, kernel_type, kernelx_data, kernelx_length, kernely_data, + kernely_length, anchor_x, anchor_y, delta, borderType); +} +#undef cv_hal_sepFilterInit +#define cv_hal_sepFilterInit kleidicv_separable_filter_2d_init_with_fallback + +// separable_filter_2d_operation +static inline int kleidicv_separable_filter_2d_operation_with_fallback( + cvhalFilter2D *context, uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int full_width, int full_height, + int offset_x, int offset_y) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + separable_filter_2d_operation, cv_hal_sepFilter, context, src_data, + src_step, dst_data, dst_step, width, height, full_width, full_height, + offset_x, offset_y); +} +#undef cv_hal_sepFilter +#define cv_hal_sepFilter kleidicv_separable_filter_2d_operation_with_fallback + +// separable_filter_2d_free +static inline int kleidicv_separable_filter_2d_free_with_fallback( + cvhalFilter2D *context) { + return KLEIDICV_HAL_FALLBACK_FORWARD(separable_filter_2d_free, + cv_hal_sepFilterFree, context); +} +#undef cv_hal_sepFilterFree +#define cv_hal_sepFilterFree kleidicv_separable_filter_2d_free_with_fallback + // gaussian_blur_binomial static inline int kleidicv_gaussian_blur_binomial_with_fallback( const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, -- GitLab From 4b121adf2cff9c012835cb09a86b1f97577b0561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Mon, 15 Jul 2024 12:36:40 +0200 Subject: [PATCH 2/6] Refactor vectors in the conformity test framework --- conformity/opencv/manager.cpp | 2 +- conformity/opencv/subordinate.cpp | 2 +- conformity/opencv/test_binary_op.cpp | 3 +- conformity/opencv/test_binary_op.h | 14 --- conformity/opencv/test_cvtcolor.cpp | 2 +- conformity/opencv/test_cvtcolor.h | 14 --- conformity/opencv/test_exp.cpp | 3 +- conformity/opencv/test_exp.h | 14 --- conformity/opencv/test_float_conv.cpp | 4 +- conformity/opencv/test_float_conv.h | 14 --- conformity/opencv/test_gaussian_blur.cpp | 4 +- conformity/opencv/test_gaussian_blur.h | 14 --- conformity/opencv/test_min_max.cpp | 4 +- conformity/opencv/test_min_max.h | 14 --- conformity/opencv/test_resize.cpp | 2 +- conformity/opencv/test_resize.h | 14 --- conformity/opencv/test_rgb2yuv.cpp | 4 +- conformity/opencv/test_rgb2yuv.h | 14 --- conformity/opencv/test_scale.cpp | 2 +- conformity/opencv/test_scale.h | 14 --- conformity/opencv/test_sobel.cpp | 4 +- conformity/opencv/test_sobel.h | 14 --- conformity/opencv/test_yuv2rgb.cpp | 4 +- conformity/opencv/test_yuv2rgb.h | 14 --- conformity/opencv/tests.cpp | 12 +-- conformity/opencv/tests.h | 106 ++++------------------- conformity/opencv/utils.h | 100 +++++++++++++++++++++ 27 files changed, 135 insertions(+), 277 deletions(-) delete mode 100644 conformity/opencv/test_binary_op.h delete mode 100644 conformity/opencv/test_cvtcolor.h delete mode 100644 conformity/opencv/test_exp.h delete mode 100644 conformity/opencv/test_float_conv.h delete mode 100644 conformity/opencv/test_gaussian_blur.h delete mode 100644 conformity/opencv/test_min_max.h delete mode 100644 conformity/opencv/test_resize.h delete mode 100644 conformity/opencv/test_rgb2yuv.h delete mode 100644 conformity/opencv/test_scale.h delete mode 100644 conformity/opencv/test_sobel.h delete mode 100644 conformity/opencv/test_yuv2rgb.h create mode 100644 conformity/opencv/utils.h diff --git a/conformity/opencv/manager.cpp b/conformity/opencv/manager.cpp index 99bc18c4c..977ba611d 100644 --- a/conformity/opencv/manager.cpp +++ b/conformity/opencv/manager.cpp @@ -10,7 +10,7 @@ #include #include "common.h" -#include "tests.h" +#include "utils.h" int main(int argc, char** argv) { if (argc < 2) { diff --git a/conformity/opencv/subordinate.cpp b/conformity/opencv/subordinate.cpp index 4291aaa48..1011c8485 100644 --- a/conformity/opencv/subordinate.cpp +++ b/conformity/opencv/subordinate.cpp @@ -5,7 +5,7 @@ #include #include "common.h" -#include "tests.h" +#include "utils.h" int main(void) { OpenedSharedMemory sm{KLEIDICV_CONFORMITY_SHM_ID, diff --git a/conformity/opencv/test_binary_op.cpp b/conformity/opencv/test_binary_op.cpp index b5e0c297b..9bc59aed6 100644 --- a/conformity/opencv/test_binary_op.cpp +++ b/conformity/opencv/test_binary_op.cpp @@ -2,13 +2,12 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_binary_op.h" - #include #include #include #include "opencv2/core/hal/interface.h" +#include "tests.h" static cv::Mat add(cv::Mat& a, cv::Mat& b) { return a + b; } diff --git a/conformity/opencv/test_binary_op.h b/conformity/opencv/test_binary_op.h deleted file mode 100644 index 169e79ad0..000000000 --- a/conformity/opencv/test_binary_op.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ - -#include - -#include "tests.h" - -std::vector& binary_op_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_BINARY_OP_H_ diff --git a/conformity/opencv/test_cvtcolor.cpp b/conformity/opencv/test_cvtcolor.cpp index 796aac7b4..53425f39c 100644 --- a/conformity/opencv/test_cvtcolor.cpp +++ b/conformity/opencv/test_cvtcolor.cpp @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_cvtcolor.h" +#include "tests.h" template static cv::Mat exec_cvtcolor(cv::Mat& input) { diff --git a/conformity/opencv/test_cvtcolor.h b/conformity/opencv/test_cvtcolor.h deleted file mode 100644 index 6f48ede21..000000000 --- a/conformity/opencv/test_cvtcolor.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_CVTCOLOR_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_CVTCOLOR_H_ - -#include - -#include "tests.h" - -std::vector& cvtcolor_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_CVTCOLOR_H_ diff --git a/conformity/opencv/test_exp.cpp b/conformity/opencv/test_exp.cpp index 9aed0eb98..95ecd6647 100644 --- a/conformity/opencv/test_exp.cpp +++ b/conformity/opencv/test_exp.cpp @@ -2,13 +2,12 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_exp.h" - #include #include #include #include "opencv2/core/hal/interface.h" +#include "tests.h" static cv::Mat exec_exp(cv::Mat& input_mat) { cv::Mat result; diff --git a/conformity/opencv/test_exp.h b/conformity/opencv/test_exp.h deleted file mode 100644 index 1da690695..000000000 --- a/conformity/opencv/test_exp.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_EXP_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_EXP_H_ - -#include - -#include "tests.h" - -std::vector& exp_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_EXP_H_ diff --git a/conformity/opencv/test_float_conv.cpp b/conformity/opencv/test_float_conv.cpp index ae091b82a..26b6bb189 100644 --- a/conformity/opencv/test_float_conv.cpp +++ b/conformity/opencv/test_float_conv.cpp @@ -2,11 +2,11 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_float_conv.h" - #include #include +#include "tests.h" + float floatval(uint32_t v) { float result; static_assert(sizeof(result) == sizeof(v)); diff --git a/conformity/opencv/test_float_conv.h b/conformity/opencv/test_float_conv.h deleted file mode 100644 index 9e4c40be6..000000000 --- a/conformity/opencv/test_float_conv.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ - -#include - -#include "tests.h" - -std::vector& float_conversion_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_FLOAT_CONV_H_ diff --git a/conformity/opencv/test_gaussian_blur.cpp b/conformity/opencv/test_gaussian_blur.cpp index 7d0ed6da3..6e3f59648 100644 --- a/conformity/opencv/test_gaussian_blur.cpp +++ b/conformity/opencv/test_gaussian_blur.cpp @@ -2,10 +2,10 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_gaussian_blur.h" - #include +#include "tests.h" + template cv::Mat exec_gaussian_blur(cv::Mat& input) { double sigma = diff --git a/conformity/opencv/test_gaussian_blur.h b/conformity/opencv/test_gaussian_blur.h deleted file mode 100644 index ddc045ed2..000000000 --- a/conformity/opencv/test_gaussian_blur.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_GAUSSIAN_BLUR_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_GAUSSIAN_BLUR_H_ - -#include - -#include "tests.h" - -std::vector& gaussian_blur_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_GAUSSIAN_BLUR_H_ diff --git a/conformity/opencv/test_min_max.cpp b/conformity/opencv/test_min_max.cpp index 539e5b36c..d84adc0c0 100644 --- a/conformity/opencv/test_min_max.cpp +++ b/conformity/opencv/test_min_max.cpp @@ -2,11 +2,11 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_min_max.h" - #include #include +#include "tests.h" + template cv::Mat exec_min_max(cv::Mat& input) { double minVal, maxVal; diff --git a/conformity/opencv/test_min_max.h b/conformity/opencv/test_min_max.h deleted file mode 100644 index df793b3cd..000000000 --- a/conformity/opencv/test_min_max.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_MIN_MAX_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_MIN_MAX_H_ - -#include - -#include "tests.h" - -std::vector& min_max_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_MIN_MAX_H_ diff --git a/conformity/opencv/test_resize.cpp b/conformity/opencv/test_resize.cpp index 1f6cbd636..45d9816bb 100644 --- a/conformity/opencv/test_resize.cpp +++ b/conformity/opencv/test_resize.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 -#include "test_resize.h" #include #include @@ -9,6 +8,7 @@ #include "opencv2/core/hal/interface.h" #include "opencv2/imgproc/hal/interface.h" +#include "tests.h" // Factor is interpreted as 1/1000, i.e. 500 for 0.5 template diff --git a/conformity/opencv/test_resize.h b/conformity/opencv/test_resize.h deleted file mode 100644 index 85387c63a..000000000 --- a/conformity/opencv/test_resize.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_RESIZE_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_RESIZE_H_ - -#include - -#include "tests.h" - -std::vector& resize_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_RESIZE_H_ diff --git a/conformity/opencv/test_rgb2yuv.cpp b/conformity/opencv/test_rgb2yuv.cpp index e6633bf3c..de27c4808 100644 --- a/conformity/opencv/test_rgb2yuv.cpp +++ b/conformity/opencv/test_rgb2yuv.cpp @@ -2,10 +2,10 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_rgb2yuv.h" - #include +#include "tests.h" + template cv::Mat exec_rgb2yuv(cv::Mat& input) { cv::Mat result; diff --git a/conformity/opencv/test_rgb2yuv.h b/conformity/opencv/test_rgb2yuv.h deleted file mode 100644 index 03d5687be..000000000 --- a/conformity/opencv/test_rgb2yuv.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_RGB2YUV_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_RGB2YUV_H_ - -#include - -#include "tests.h" - -std::vector& rgb2yuv_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_RGB2YUV_H_ diff --git a/conformity/opencv/test_scale.cpp b/conformity/opencv/test_scale.cpp index 5948ffb18..bc1614a17 100644 --- a/conformity/opencv/test_scale.cpp +++ b/conformity/opencv/test_scale.cpp @@ -1,13 +1,13 @@ // SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 -#include "test_scale.h" #include #include #include #include "opencv2/core/hal/interface.h" +#include "tests.h" template cv::Mat exec_scale(cv::Mat& input_mat) { diff --git a/conformity/opencv/test_scale.h b/conformity/opencv/test_scale.h deleted file mode 100644 index 86916230a..000000000 --- a/conformity/opencv/test_scale.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_SCALE_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_SCALE_H_ - -#include - -#include "tests.h" - -std::vector& scale_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_SCALE_H_ diff --git a/conformity/opencv/test_sobel.cpp b/conformity/opencv/test_sobel.cpp index 473e153a6..807af0c8a 100644 --- a/conformity/opencv/test_sobel.cpp +++ b/conformity/opencv/test_sobel.cpp @@ -2,10 +2,10 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_sobel.h" - #include +#include "tests.h" + template cv::Mat exec_sobel(cv::Mat& input) { cv::Mat result; diff --git a/conformity/opencv/test_sobel.h b/conformity/opencv/test_sobel.h deleted file mode 100644 index 131fb3850..000000000 --- a/conformity/opencv/test_sobel.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_SOBEL_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_SOBEL_H_ - -#include - -#include "tests.h" - -std::vector& sobel_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_SOBEL_H_ diff --git a/conformity/opencv/test_yuv2rgb.cpp b/conformity/opencv/test_yuv2rgb.cpp index 45ced2984..4a0573efc 100644 --- a/conformity/opencv/test_yuv2rgb.cpp +++ b/conformity/opencv/test_yuv2rgb.cpp @@ -2,10 +2,10 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "test_yuv2rgb.h" - #include +#include "tests.h" + template cv::Mat exec_yuv2rgb(cv::Mat& input) { cv::Mat result; diff --git a/conformity/opencv/test_yuv2rgb.h b/conformity/opencv/test_yuv2rgb.h deleted file mode 100644 index d449f2c7b..000000000 --- a/conformity/opencv/test_yuv2rgb.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 - -#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_YUV2RGB_H_ -#define KLEIDICV_OPENCV_CONFORMITY_TEST_YUV2RGB_H_ - -#include - -#include "tests.h" - -std::vector& yuv2rgb_tests_get(); - -#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_YUV2RGB_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 2cf2e491c..e43d60fb2 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -10,17 +10,7 @@ #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" -#include "test_binary_op.h" -#include "test_cvtcolor.h" -#include "test_exp.h" -#include "test_float_conv.h" -#include "test_gaussian_blur.h" -#include "test_min_max.h" -#include "test_resize.h" -#include "test_rgb2yuv.h" -#include "test_scale.h" -#include "test_sobel.h" -#include "test_yuv2rgb.h" +#include "utils.h" static std::vector merge_tests( std::initializer_list& (*)()> test_groups) { diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index 7300bedad..c62fd6d72 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -5,96 +5,20 @@ #ifndef KLEIDICV_OPENCV_CONFORMITY_TESTS_H_ #define KLEIDICV_OPENCV_CONFORMITY_TESTS_H_ -#include -#include -#include - -#include "common.h" - -#if MANAGER -template -static auto abs_diff(T a, T b) { - return a > b ? a - b : b - a; -} - -static inline bool check_matrix_size_and_type(cv::Mat& A, cv::Mat& B) { - if (A.rows != B.rows || A.cols != B.cols || A.type() != B.type()) { - std::cout << "Matrix size/type mismatch" << std::endl; - return true; - } - - return false; -} - -// Expected matrix should not contain zeros -template -bool are_float_matrices_different(T threshold_percent, cv::Mat& exp, - cv::Mat& act) { - if (check_matrix_size_and_type(exp, act)) { - return true; - } - - for (int i = 0; i < exp.rows; ++i) { - for (int j = 0; j < (exp.cols * CV_MAT_CN(exp.type())); ++j) { - T diff = abs_diff(exp.at(i, j), act.at(i, j)); - T diff_percentage = (diff / std::abs(exp.at(i, j))) * 100; - if (diff_percentage > threshold_percent) { - std::cout << "=== Mismatch at: " << i << " " << j << std::endl - << "Expected: " << exp.at(i, j) - << " Actual: " << act.at(i, j) << std::endl - << "Relative diff: " << diff_percentage << std::endl - << std::endl; - return true; - } - } - } - - return false; -} - -template -bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) { - if (check_matrix_size_and_type(A, B)) { - return true; - } - - for (int i = 0; i < A.rows; ++i) { - for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) { - if (abs_diff(A.at(i, j), B.at(i, j)) > threshold) { - std::cout << "=== Mismatch at: " << i << " " << j << std::endl - << std::endl; - return true; - } - } - } - - return false; -} - -void fail_print_matrices(size_t height, size_t width, cv::Mat& input, - cv::Mat& manager_result, cv::Mat& subord_result); - -cv::Mat get_expected_from_subordinate(int index, - RecreatedMessageQueue& request_queue, - RecreatedMessageQueue& reply_queue, - cv::Mat& input); - -int run_tests(RecreatedMessageQueue& request_queue, - RecreatedMessageQueue& reply_queue); - -typedef bool (*test_function)(int index, RecreatedMessageQueue& request_queue, - RecreatedMessageQueue& reply_queue); -using test = std::pair; -#define TEST(name, test_func, x) \ - { name, test_func } -#else -void wait_for_requests(OpenedMessageQueue& request_queue, - OpenedMessageQueue& reply_queue); - -typedef cv::Mat (*exec_function)(cv::Mat& input); -using test = std::pair; -#define TEST(name, x, exec_func) \ - { name, exec_func } -#endif +#include + +#include "utils.h" + +std::vector& binary_op_tests_get(); +std::vector& cvtcolor_tests_get(); +std::vector& gaussian_blur_tests_get(); +std::vector& rgb2yuv_tests_get(); +std::vector& yuv2rgb_tests_get(); +std::vector& sobel_tests_get(); +std::vector& exp_tests_get(); +std::vector& float_conversion_tests_get(); +std::vector& resize_tests_get(); +std::vector& scale_tests_get(); +std::vector& min_max_tests_get(); #endif // KLEIDICV_OPENCV_CONFORMITY_TESTS_H_ diff --git a/conformity/opencv/utils.h b/conformity/opencv/utils.h new file mode 100644 index 000000000..64f3625cd --- /dev/null +++ b/conformity/opencv/utils.h @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_UTILS_H_ +#define KLEIDICV_OPENCV_CONFORMITY_UTILS_H_ + +#include +#include +#include + +#include "common.h" + +#if MANAGER +template +static auto abs_diff(T a, T b) { + return a > b ? a - b : b - a; +} + +static inline bool check_matrix_size_and_type(cv::Mat& A, cv::Mat& B) { + if (A.rows != B.rows || A.cols != B.cols || A.type() != B.type()) { + std::cout << "Matrix size/type mismatch" << std::endl; + return true; + } + + return false; +} + +// Expected matrix should not contain zeros +template +bool are_float_matrices_different(T threshold_percent, cv::Mat& exp, + cv::Mat& act) { + if (check_matrix_size_and_type(exp, act)) { + return true; + } + + for (int i = 0; i < exp.rows; ++i) { + for (int j = 0; j < (exp.cols * CV_MAT_CN(exp.type())); ++j) { + T diff = abs_diff(exp.at(i, j), act.at(i, j)); + T diff_percentage = (diff / std::abs(exp.at(i, j))) * 100; + if (diff_percentage > threshold_percent) { + std::cout << "=== Mismatch at: " << i << " " << j << std::endl + << "Expected: " << exp.at(i, j) + << " Actual: " << act.at(i, j) << std::endl + << "Relative diff: " << diff_percentage << std::endl + << std::endl; + return true; + } + } + } + + return false; +} + +template +bool are_matrices_different(T threshold, cv::Mat& A, cv::Mat& B) { + if (check_matrix_size_and_type(A, B)) { + return true; + } + + for (int i = 0; i < A.rows; ++i) { + for (int j = 0; j < (A.cols * CV_MAT_CN(A.type())); ++j) { + if (abs_diff(A.at(i, j), B.at(i, j)) > threshold) { + std::cout << "=== Mismatch at: " << i << " " << j << std::endl + << std::endl; + return true; + } + } + } + + return false; +} + +void fail_print_matrices(size_t height, size_t width, cv::Mat& input, + cv::Mat& manager_result, cv::Mat& subord_result); + +cv::Mat get_expected_from_subordinate(int index, + RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue, + cv::Mat& input); + +int run_tests(RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue); + +typedef bool (*test_function)(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue); +using test = std::pair; +#define TEST(name, test_func, x) \ + { name, test_func } +#else +void wait_for_requests(OpenedMessageQueue& request_queue, + OpenedMessageQueue& reply_queue); + +typedef cv::Mat (*exec_function)(cv::Mat& input); +using test = std::pair; +#define TEST(name, x, exec_func) \ + { name, exec_func } +#endif + +#endif // KLEIDICV_OPENCV_CONFORMITY_UTILS_H_ -- GitLab From 9a71bc62099822d4935444522dedcf70a4b7ca3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Mon, 15 Jul 2024 18:12:34 +0200 Subject: [PATCH 3/6] Add Separable Filter 2D conformity tests --- conformity/opencv/test_gaussian_blur.cpp | 4 +- .../opencv/test_separable_filter_2d.cpp | 88 +++++++++++++++++++ conformity/opencv/tests.cpp | 1 + conformity/opencv/tests.h | 1 + 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 conformity/opencv/test_separable_filter_2d.cpp diff --git a/conformity/opencv/test_gaussian_blur.cpp b/conformity/opencv/test_gaussian_blur.cpp index 6e3f59648..776fbec2b 100644 --- a/conformity/opencv/test_gaussian_blur.cpp +++ b/conformity/opencv/test_gaussian_blur.cpp @@ -9,7 +9,7 @@ template cv::Mat exec_gaussian_blur(cv::Mat& input) { double sigma = - *reinterpret_cast(&input.at(0, input.rows - 2)); + *reinterpret_cast(&input.at(input.rows - 2, 0)); // clone is required, otherwise the result matrix is treated as part of a // bigger image, and it would have impact on what border types are supported cv::Mat input_mat = input.rowRange(0, input.rows - 2).clone(); @@ -49,7 +49,7 @@ bool test_gaussian_blur(int index, RecreatedMessageQueue& request_queue, } // sigma is embedded into the input matrix - *reinterpret_cast(&input.at(0, input.rows - 2)) = sigma; + *reinterpret_cast(&input.at(input.rows - 2, 0)) = sigma; cv::Mat actual = exec_gaussian_blur(input); cv::Mat expected = get_expected_from_subordinate(index, request_queue, diff --git a/conformity/opencv/test_separable_filter_2d.cpp b/conformity/opencv/test_separable_filter_2d.cpp new file mode 100644 index 000000000..70ac5cf2a --- /dev/null +++ b/conformity/opencv/test_separable_filter_2d.cpp @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "tests.h" + +template +cv::Mat exec_separable_filter_2d(cv::Mat& input) { + uint32_t kernel_seed = + *reinterpret_cast(&input.at(input.rows - 1, 0)); + // clone is required, otherwise the result matrix is treated as part of a + // bigger image, and it would have impact on what border types are supported + cv::Mat input_mat = input.rowRange(0, input.rows - 1).clone(); + + cv::RNG rng(kernel_seed); + cv::Mat kernel_x(KernelSize, 1, CV_8UC1); + rng.fill(kernel_x, cv::RNG::UNIFORM, 0, 5); + cv::Mat kernel_y(KernelSize, 1, CV_8UC1); + rng.fill(kernel_y, cv::RNG::UNIFORM, 0, 5); + + cv::Mat result; + cv::sepFilter2D(input_mat, result, -1, kernel_x, kernel_y, cv::Point(-1, -1), + 0, BorderType); + return result; +} + +#if MANAGER +template +bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t y = 5; y <= 16; ++y) { + for (size_t x = 5; x <= 16; ++x) { + // One extra line allocated to be sure the kernel seed can be placed next + // to the real input + cv::Mat input(y + 1, x, CV_8UC(Channels)); + rng.fill(input, cv::RNG::UNIFORM, 0, 7); + + uint32_t kernel_seed = rng.next(); + + // kernel seed is embedded into the input matrix + *reinterpret_cast(&input.at(input.rows - 1, 0)) = + kernel_seed; + + cv::Mat actual = exec_separable_filter_2d(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(y, x, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +std::vector& separable_filter_2d_tests_get() { + // clang-format off + static std::vector tests = { +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), +// +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), +// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), +// +// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 1 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 1>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), +// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 2 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 2>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), +// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 3 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 3>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), +// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 4 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 4>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), + + TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 1>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), + TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 2>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), + TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 3>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), + TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 4>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index e43d60fb2..c6cadbaf6 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -25,6 +25,7 @@ static std::vector merge_tests( std::vector all_tests = merge_tests({ binary_op_tests_get, cvtcolor_tests_get, + separable_filter_2d_tests_get, gaussian_blur_tests_get, rgb2yuv_tests_get, yuv2rgb_tests_get, diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index c62fd6d72..64c0cc59d 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -11,6 +11,7 @@ std::vector& binary_op_tests_get(); std::vector& cvtcolor_tests_get(); +std::vector& separable_filter_2d_tests_get(); std::vector& gaussian_blur_tests_get(); std::vector& rgb2yuv_tests_get(); std::vector& yuv2rgb_tests_get(); -- GitLab From 28c4c0c3cc8bbee9729916aa23bca97a0737cf55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Mon, 15 Jul 2024 20:58:07 +0200 Subject: [PATCH 4/6] Re-add missing border types For Separable Filter 2D, re-add the following border types: - REFLECT - WRAP - REVERSE --- adapters/opencv/kleidicv_hal.cpp | 8 +++++- .../src/filters/separable_filter_2d_neon.cpp | 8 +++--- kleidicv/src/filters/separable_filter_2d_sc.h | 8 +++--- test/api/test_separable_filter_2d.cpp | 26 +++++++------------ 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 8192263d5..8e8e59859 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -317,7 +317,13 @@ int separable_filter_2d_init(cvhalFilter2D **context, int src_type, } if (kleidicv_border_type != - kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_REPLICATE) { + kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_REPLICATE && + kleidicv_border_type != + kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_REFLECT && + kleidicv_border_type != + kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_WRAP && + kleidicv_border_type != + kleidicv_border_type_t::KLEIDICV_BORDER_TYPE_REVERSE) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index 35f32c088..770b2d78d 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -123,7 +123,9 @@ kleidicv_error_t separable_filter_2d_u8( return KLEIDICV_ERROR_CONTEXT_MISMATCH; } - if (!fixed_border_type || *fixed_border_type != FixedBorderType::REPLICATE) { + // if the std::optional is empty, that means that the border type is not + // supported, so there's no need to check for specific types + if (!fixed_border_type) { return KLEIDICV_ERROR_NOT_IMPLEMENTED; } @@ -134,8 +136,8 @@ kleidicv_error_t separable_filter_2d_u8( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, - FixedBorderType::REPLICATE, filter); + workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, + filter); return KLEIDICV_OK; } diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index 4ecac2b91..f280435b3 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -148,7 +148,9 @@ static kleidicv_error_t separable_filter_2d_u8_sc( return KLEIDICV_ERROR_CONTEXT_MISMATCH; } - if (!fixed_border_type || *fixed_border_type != FixedBorderType::REPLICATE) { + // if the std::optional is empty, that means that the border type is not + // supported, so there's no need to check for specific types + if (!fixed_border_type) { return KLEIDICV_ERROR_NOT_IMPLEMENTED; } @@ -159,8 +161,8 @@ static kleidicv_error_t separable_filter_2d_u8_sc( Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; - workspace->process(rect, src_rows, dst_rows, channels, - FixedBorderType::REPLICATE, filter); + workspace->process(rect, src_rows, dst_rows, channels, *fixed_border_type, + filter); return KLEIDICV_OK; } diff --git a/test/api/test_separable_filter_2d.cpp b/test/api/test_separable_filter_2d.cpp index f0a001a64..16a9c6459 100644 --- a/test/api/test_separable_filter_2d.cpp +++ b/test/api/test_separable_filter_2d.cpp @@ -29,6 +29,13 @@ struct SeparableFilter2DKernelTestParams { static constexpr std::array kDefaultBorder = { KLEIDICV_BORDER_TYPE_REPLICATE}; +static constexpr std::array kAllBorders = { + KLEIDICV_BORDER_TYPE_REPLICATE, + KLEIDICV_BORDER_TYPE_REFLECT, + KLEIDICV_BORDER_TYPE_WRAP, + KLEIDICV_BORDER_TYPE_REVERSE, +}; + template std::unique_ptr> make_generator_ptr(IterableType &elements) { @@ -136,7 +143,9 @@ TYPED_TEST(SeparableFilter2D, 5x5) { mask.set(4, 0, { 2, 1, 0, 2, 1}); // clang-format on uint8_t kernel[5] = {2, 1, 0, 2, 1}; - SeparableFilter2DTest{kernel, kernel}.test(mask, 7); + SeparableFilter2DTest{kernel, kernel} + .with_border_types(make_generator_ptr(kAllBorders)) + .test(mask, 7); } TYPED_TEST(SeparableFilter2D, NullPointer) { @@ -323,21 +332,6 @@ TYPED_TEST(SeparableFilter2D, InvalidBorderType) { separable_filter_2d()( src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_CONSTANT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_NOT_IMPLEMENTED, - separable_filter_2d()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, - 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REFLECT, context)); - EXPECT_EQ( - KLEIDICV_ERROR_NOT_IMPLEMENTED, - separable_filter_2d()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, - 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_WRAP, context)); - EXPECT_EQ( - KLEIDICV_ERROR_NOT_IMPLEMENTED, - separable_filter_2d()( - src, sizeof(TypeParam), dst, sizeof(TypeParam), validSize, validSize, - 1, kernel, 5, kernel, 5, KLEIDICV_BORDER_TYPE_REVERSE, context)); EXPECT_EQ( KLEIDICV_ERROR_NOT_IMPLEMENTED, separable_filter_2d()( -- GitLab From 80e09df55be22fceb20c554b7e7d5918b02b0006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Tue, 16 Jul 2024 17:04:27 +0200 Subject: [PATCH 5/6] Fix saturation in Separable Filter 2D --- .../opencv/test_separable_filter_2d.cpp | 23 ++-- .../src/filters/separable_filter_2d_neon.cpp | 93 +++++++------- kleidicv/src/filters/separable_filter_2d_sc.h | 116 +++++++++--------- 3 files changed, 118 insertions(+), 114 deletions(-) diff --git a/conformity/opencv/test_separable_filter_2d.cpp b/conformity/opencv/test_separable_filter_2d.cpp index 70ac5cf2a..d56d52d79 100644 --- a/conformity/opencv/test_separable_filter_2d.cpp +++ b/conformity/opencv/test_separable_filter_2d.cpp @@ -63,20 +63,15 @@ bool test_separable_filter_2d(int index, RecreatedMessageQueue& request_queue, std::vector& separable_filter_2d_tests_get() { // clang-format off static std::vector tests = { -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), -// -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), -// TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), -// -// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 1 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 1>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), -// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 2 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 2>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), -// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 3 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 3>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), -// TEST("Separable Filter 2D 5x5, BORDER_WRAP, 4 channel", (test_separable_filter_2d<5, cv::BORDER_WRAP, 4>), (exec_separable_filter_2d<5, cv::BORDER_WRAP>)), + TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), + TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), + TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), + TEST("Separable Filter 2D 5x5, BORDER_REFLECT_101, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT_101, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT_101>)), + + TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 1>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), + TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 2>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), + TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 3 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 3>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), + TEST("Separable Filter 2D 5x5, BORDER_REFLECT, 4 channel", (test_separable_filter_2d<5, cv::BORDER_REFLECT, 4>), (exec_separable_filter_2d<5, cv::BORDER_REFLECT>)), TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 1 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 1>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), TEST("Separable Filter 2D 5x5, BORDER_REPLICATE, 2 channel", (test_separable_filter_2d<5, cv::BORDER_REPLICATE, 2>), (exec_separable_filter_2d<5, cv::BORDER_REPLICATE>)), diff --git a/kleidicv/src/filters/separable_filter_2d_neon.cpp b/kleidicv/src/filters/separable_filter_2d_neon.cpp index 770b2d78d..8d3d0d3ed 100644 --- a/kleidicv/src/filters/separable_filter_2d_neon.cpp +++ b/kleidicv/src/filters/separable_filter_2d_neon.cpp @@ -2,6 +2,8 @@ // // SPDX-License-Identifier: Apache-2.0 +#include + #include "kleidicv/ctypes.h" #include "kleidicv/filters/separable_filter_2d.h" #include "kleidicv/kleidicv.h" @@ -17,20 +19,37 @@ template <> class SeparableFilter2D { public: using SourceType = uint8_t; - using BufferType = uint32_t; + using BufferType = uint8_t; using DestinationType = uint8_t; explicit SeparableFilter2D(const uint8_t *kernel_x, const uint8_t *kernel_y) : kernel_x_(kernel_x), kernel_y_(kernel_y) {} void vertical_vector_path(uint8x16_t src[5], BufferType *dst) const { - uint16x8_t initial_l = vmovl_u8(vget_low_u8(src[0])); - uint16x8_t initial_h = vmovl_u8(vget_high_u8(src[0])); + this->vector_path_with_kernel(src, dst, kernel_y_); + } + + void vertical_scalar_path(const SourceType src[5], BufferType *dst) const { + this->scalar_path_with_kernel(src, dst, kernel_y_); + } + + void horizontal_vector_path(uint8x16_t src[5], DestinationType *dst) const { + this->vector_path_with_kernel(src, dst, kernel_x_); + } + + void horizontal_scalar_path(const BufferType src[5], + DestinationType *dst) const { + this->scalar_path_with_kernel(src, dst, kernel_x_); + } + + private: + void vector_path_with_kernel(uint8x16_t src[5], uint8_t *dst, + const uint8_t *kernel) const { + uint16x8_t acc_l = vmovl_u8(vget_low_u8(src[0])); + uint16x8_t acc_h = vmovl_u8(vget_high_u8(src[0])); - uint32x4_t acc_l_l = vmull_n_u16(vget_low_u16(initial_l), kernel_y_[0]); - uint32x4_t acc_l_h = vmull_n_u16(vget_high_u16(initial_l), kernel_y_[0]); - uint32x4_t acc_h_l = vmull_n_u16(vget_low_u16(initial_h), kernel_y_[0]); - uint32x4_t acc_h_h = vmull_n_u16(vget_high_u16(initial_h), kernel_y_[0]); + acc_l = vmulq_n_u16(acc_l, kernel[0]); + acc_h = vmulq_n_u16(acc_h, kernel[0]); // Optimization to avoid unnecessary branching in vector code. KLEIDICV_FORCE_LOOP_UNROLL @@ -38,51 +57,39 @@ class SeparableFilter2D { uint16x8_t vec_l = vmovl_u8(vget_low_u8(src[i])); uint16x8_t vec_h = vmovl_u8(vget_high_u8(src[i])); - acc_l_l = vmlal_n_u16(acc_l_l, vget_low_u16(vec_l), kernel_y_[i]); - acc_l_h = vmlal_n_u16(acc_l_h, vget_high_u16(vec_l), kernel_y_[i]); - acc_h_l = vmlal_n_u16(acc_h_l, vget_low_u16(vec_h), kernel_y_[i]); - acc_h_h = vmlal_n_u16(acc_h_h, vget_high_u16(vec_h), kernel_y_[i]); + acc_l = vmlaq_n_u16(acc_l, vec_l, kernel[i]); + acc_h = vmlaq_n_u16(acc_h, vec_h, kernel[i]); } - uint32x4x4_t result = {acc_l_l, acc_l_h, acc_h_l, acc_h_h}; + uint8x8_t result_l = vqmovn_u16(acc_l); + uint8x16_t result = vqmovn_high_u16(result_l, acc_h); - vst1q_u32_x4(&dst[0], result); + vst1q_u8(&dst[0], result); } - void vertical_scalar_path(const SourceType src[5], BufferType *dst) const { - uint32_t acc = static_cast(src[0]) * kernel_y_[0] + - static_cast(src[1]) * kernel_y_[1] + - static_cast(src[2]) * kernel_y_[2] + - static_cast(src[3]) * kernel_y_[3] + - static_cast(src[4]) * kernel_y_[4]; - - dst[0] = acc; - } - - void horizontal_vector_path(uint32x4_t src[5], DestinationType *dst) const { - uint32x4_t acc = vmulq_n_u32(src[0], kernel_x_[0]); - acc = vmlaq_n_u32(acc, src[1], kernel_x_[1]); - acc = vmlaq_n_u32(acc, src[2], kernel_x_[2]); - acc = vmlaq_n_u32(acc, src[3], kernel_x_[3]); - acc = vmlaq_n_u32(acc, src[4], kernel_x_[4]); - - uint16x4_t narrowed = vmovn_u32(acc); - uint8x8_t interleaved = - vuzp1_u8(vreinterpret_u8_u16(narrowed), vreinterpret_u8_u16(narrowed)); - uint32_t result = vget_lane_u32(vreinterpret_u32_u8(interleaved), 0); - memcpy(&dst[0], &result, sizeof(result)); - } + void scalar_path_with_kernel(const uint8_t src[5], uint8_t *dst, + const uint8_t *kernel) const { + uint8_t acc; // NOLINT + if (__builtin_mul_overflow(src[0], kernel[0], &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } - void horizontal_scalar_path(const BufferType src[5], - DestinationType *dst) const { - uint32_t acc = src[0] * kernel_x_[0] + src[1] * kernel_x_[1] + - src[2] * kernel_x_[2] + src[3] * kernel_x_[3] + - src[4] * kernel_x_[4]; + for (size_t i = 1; i < 5; i++) { + uint8_t temp; // NOLINT + if (__builtin_mul_overflow(src[i], kernel[i], &temp)) { + dst[0] = std::numeric_limits::max(); + return; + } + if (__builtin_add_overflow(acc, temp, &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + } - dst[0] = static_cast(acc); + dst[0] = acc; } - private: const uint8_t *kernel_x_; const uint8_t *kernel_y_; }; diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index f280435b3..9ba9c9fb8 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -5,6 +5,8 @@ #ifndef KLEIDICV_SEPARABLE_FILTER_2D_SC_H #define KLEIDICV_SEPARABLE_FILTER_2D_SC_H +#include + #include "kleidicv/kleidicv.h" #include "kleidicv/separable_filter_5x5_sc.h" #include "kleidicv/sve2.h" @@ -18,7 +20,7 @@ template <> class SeparableFilter2D { public: using SourceType = uint8_t; - using BufferType = uint32_t; + using BufferType = uint8_t; using DestinationType = uint8_t; explicit SeparableFilter2D(const uint8_t *kernel_x, const uint8_t *kernel_y) @@ -28,86 +30,86 @@ class SeparableFilter2D { svuint8_t src_2, svuint8_t src_3, svuint8_t src_4, BufferType *dst) const KLEIDICV_STREAMING_COMPATIBLE { - // 2 - svuint16_t vec_0_b = svmovlb_u16(src_0); - svuint16_t vec_0_t = svmovlt_u16(src_0); + this->vector_path_with_kernel(pg, src_0, src_1, src_2, src_3, src_4, dst, + kernel_y_); + } + + void horizontal_vector_path(svbool_t pg, svuint8_t src_0, svuint8_t src_1, + svuint8_t src_2, svuint8_t src_3, svuint8_t src_4, + DestinationType *dst) const + KLEIDICV_STREAMING_COMPATIBLE { + this->vector_path_with_kernel(pg, src_0, src_1, src_2, src_3, src_4, dst, + kernel_x_); + } + + void horizontal_scalar_path(const BufferType src[5], DestinationType *dst) + const KLEIDICV_STREAMING_COMPATIBLE { + uint8_t acc; // NOLINT + if (__builtin_mul_overflow(src[0], kernel_x_[0], &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + + for (size_t i = 1; i < 5; i++) { + uint8_t temp; // NOLINT + if (__builtin_mul_overflow(src[i], kernel_x_[i], &temp)) { + dst[0] = std::numeric_limits::max(); + return; + } + if (__builtin_add_overflow(acc, temp, &acc)) { + dst[0] = std::numeric_limits::max(); + return; + } + } + + dst[0] = acc; + } + + private: + void vector_path_with_kernel( + svbool_t pg, svuint8_t src_0, svuint8_t src_1, svuint8_t src_2, + svuint8_t src_3, svuint8_t src_4, BufferType *dst, + const uint8_t *kernel) const KLEIDICV_STREAMING_COMPATIBLE { + // 0 + svuint16_t acc_b = svmovlb_u16(src_0); + svuint16_t acc_t = svmovlt_u16(src_0); - svuint32_t acc_b_b = svmullb_n_u32(vec_0_b, kernel_y_[0]); - svuint32_t acc_b_t = svmullb_n_u32(vec_0_t, kernel_y_[0]); - svuint32_t acc_t_b = svmullt_n_u32(vec_0_b, kernel_y_[0]); - svuint32_t acc_t_t = svmullt_n_u32(vec_0_t, kernel_y_[0]); + acc_b = svmul_n_u16_x(pg, acc_b, kernel[0]); + acc_t = svmul_n_u16_x(pg, acc_t, kernel[0]); // 1 svuint16_t vec_1_b = svmovlb_u16(src_1); svuint16_t vec_1_t = svmovlt_u16(src_1); - acc_b_b = svmlalb_n_u32(acc_b_b, vec_1_b, kernel_y_[1]); - acc_b_t = svmlalb_n_u32(acc_b_t, vec_1_t, kernel_y_[1]); - acc_t_b = svmlalt_n_u32(acc_t_b, vec_1_b, kernel_y_[1]); - acc_t_t = svmlalt_n_u32(acc_t_t, vec_1_t, kernel_y_[1]); + acc_b = svmla_n_u16_x(pg, acc_b, vec_1_b, kernel[1]); + acc_t = svmla_n_u16_x(pg, acc_t, vec_1_t, kernel[1]); // 2 svuint16_t vec_2_b = svmovlb_u16(src_2); svuint16_t vec_2_t = svmovlt_u16(src_2); - acc_b_b = svmlalb_n_u32(acc_b_b, vec_2_b, kernel_y_[2]); - acc_b_t = svmlalb_n_u32(acc_b_t, vec_2_t, kernel_y_[2]); - acc_t_b = svmlalt_n_u32(acc_t_b, vec_2_b, kernel_y_[2]); - acc_t_t = svmlalt_n_u32(acc_t_t, vec_2_t, kernel_y_[2]); + acc_b = svmla_n_u16_x(pg, acc_b, vec_2_b, kernel[2]); + acc_t = svmla_n_u16_x(pg, acc_t, vec_2_t, kernel[2]); // 3 svuint16_t vec_3_b = svmovlb_u16(src_3); svuint16_t vec_3_t = svmovlt_u16(src_3); - acc_b_b = svmlalb_n_u32(acc_b_b, vec_3_b, kernel_y_[3]); - acc_b_t = svmlalb_n_u32(acc_b_t, vec_3_t, kernel_y_[3]); - acc_t_b = svmlalt_n_u32(acc_t_b, vec_3_b, kernel_y_[3]); - acc_t_t = svmlalt_n_u32(acc_t_t, vec_3_t, kernel_y_[3]); + acc_b = svmla_n_u16_x(pg, acc_b, vec_3_b, kernel[3]); + acc_t = svmla_n_u16_x(pg, acc_t, vec_3_t, kernel[3]); // 4 svuint16_t vec_4_b = svmovlb_u16(src_4); svuint16_t vec_4_t = svmovlt_u16(src_4); - acc_b_b = svmlalb_n_u32(acc_b_b, vec_4_b, kernel_y_[4]); - acc_b_t = svmlalb_n_u32(acc_b_t, vec_4_t, kernel_y_[4]); - acc_t_b = svmlalt_n_u32(acc_t_b, vec_4_b, kernel_y_[4]); - acc_t_t = svmlalt_n_u32(acc_t_t, vec_4_t, kernel_y_[4]); + acc_b = svmla_n_u16_x(pg, acc_b, vec_4_b, kernel[4]); + acc_t = svmla_n_u16_x(pg, acc_t, vec_4_t, kernel[4]); - svuint32x4_t interleaved = svcreate4(acc_b_b, acc_b_t, acc_t_b, acc_t_t); - svst4(pg, &dst[0], interleaved); + svuint8_t result_b = svqxtnb_u16(acc_b); + svuint8_t result = svqxtnt_u16(result_b, acc_t); + svst1_u8(pg, &dst[0], result); } - void horizontal_vector_path(svbool_t pg, svuint32_t src_0, svuint32_t src_1, - svuint32_t src_2, svuint32_t src_3, - svuint32_t src_4, DestinationType *dst) const - KLEIDICV_STREAMING_COMPATIBLE { - // 0 - svuint32_t acc = svmul_n_u32_x(pg, src_0, kernel_x_[0]); - - // 1 - acc = svmla_n_u32_x(pg, acc, src_1, kernel_x_[1]); - - // 2 - acc = svmla_n_u32_x(pg, acc, src_2, kernel_x_[2]); - - // 3 - acc = svmla_n_u32_x(pg, acc, src_3, kernel_x_[3]); - - // 4 - acc = svmla_n_u32_x(pg, acc, src_4, kernel_x_[4]); - - svst1b_u32(pg, &dst[0], acc); - } - - void horizontal_scalar_path(const BufferType src[5], DestinationType *dst) - const KLEIDICV_STREAMING_COMPATIBLE { - uint32_t acc = src[0] * kernel_x_[0] + src[1] * kernel_x_[1] + - src[2] * kernel_x_[2] + src[3] * kernel_x_[3] + - src[4] * kernel_x_[4]; - dst[0] = static_cast(acc); - } - - private: const uint8_t *kernel_x_; const uint8_t *kernel_y_; }; // end of class SeparableFilter2D -- GitLab From 966e91707c51936618dad362af6237277dd73a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Podgain=C3=B5i?= Date: Wed, 17 Jul 2024 12:23:44 +0200 Subject: [PATCH 6/6] Add 5x5Overflow test to increase test coverage --- test/api/test_separable_filter_2d.cpp | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/api/test_separable_filter_2d.cpp b/test/api/test_separable_filter_2d.cpp index 16a9c6459..4d440b08c 100644 --- a/test/api/test_separable_filter_2d.cpp +++ b/test/api/test_separable_filter_2d.cpp @@ -148,6 +148,51 @@ TYPED_TEST(SeparableFilter2D, 5x5) { .test(mask, 7); } +TYPED_TEST(SeparableFilter2D, 5x5Overflow) { + kleidicv_filter_context_t *context = nullptr; + ASSERT_EQ(KLEIDICV_OK, + kleidicv_filter_context_create(&context, 1, 5, 5, 5, 5)); + test::Array2D src{5, 5, test::Options::vector_length()}; + // clang-format off + src.set(0, 0, { 1, 2, 3, 4, 5}); + src.set(1, 0, { 2, 3, 4, 5, 6}); + src.set(2, 0, { 3, 4, 5, 6, 7}); + src.set(3, 0, { 4, 5, 6, 7, 8}); + src.set(4, 0, { 5, 6, 7, 8, 9}); + // clang-format on + + test::Array2D kernel_x{5, 1}; + kernel_x.set(0, 0, {9, 9, 9, 9, 9}); + test::Array2D kernel_y{5, 1}; + kernel_y.set(0, 0, {5, 6, 7, 8, 9}); + + test::Array2D dst{5, 5, test::Options::vector_length()}; + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + 5, 5, 1, kernel_x.data(), 5, kernel_y.data(), 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + + test::Array2D dst_expected{5, 5, test::Options::vector_length()}; + // clang-format off + dst_expected.set(0, 0, { 255, 255, 255, 255, 255}); + dst_expected.set(1, 0, { 255, 255, 255, 255, 255}); + dst_expected.set(2, 0, { 255, 255, 255, 255, 255}); + dst_expected.set(3, 0, { 255, 255, 255, 255, 255}); + dst_expected.set(4, 0, { 255, 255, 255, 255, 255}); + // clang-format on + EXPECT_EQ_ARRAY2D(dst_expected, dst); + + kernel_x.set(0, 0, {0, 1, 2, 3, 4}); + kernel_y.set(0, 0, {9, 9, 9, 9, 9}); + + EXPECT_EQ(KLEIDICV_OK, separable_filter_2d()( + src.data(), src.stride(), dst.data(), dst.stride(), + 5, 5, 1, kernel_x.data(), 5, kernel_y.data(), 5, + KLEIDICV_BORDER_TYPE_REPLICATE, context)); + EXPECT_EQ(KLEIDICV_OK, kleidicv_filter_context_release(context)); + EXPECT_EQ_ARRAY2D(dst_expected, dst); +} + TYPED_TEST(SeparableFilter2D, NullPointer) { using KernelTestParams = SeparableFilter2DKernelTestParams; kleidicv_filter_context_t *context = nullptr; -- GitLab