diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ee73a883828df4ff4c4a9532d4eb235f27bd402..c192adc87e0fa75beb6c875886a8274c120fb5c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ This changelog aims to follow the guiding principles of - Scale function for float. - Add, subtract, multiply & absdiff enabled in OpenCV HAL. - MinMax enabled in OpenCV HAL, float version added. +- Resize 4x4 for float. +- Resize 0.5x0.5 for uint8_t. ### Fixed diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 4f3051f31189ae70275a342edc7f3a5cd4e883c5..23087b731607189dd3ee9816e70d8b15e95c063d 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -463,6 +463,15 @@ int resize(int src_type, const uchar *src_data, size_t src_step, int src_width, return CV_HAL_ERROR_NOT_IMPLEMENTED; } + if (CV_MAT_DEPTH(src_type) == CV_8U && inv_scale_x == 0.5 && + inv_scale_y == 0.5 && + (interpolation == CV_HAL_INTER_LINEAR || + interpolation == CV_HAL_INTER_AREA)) { + return convert_error(kleidicv_resize_to_quarter_u8( + src_data, src_step, src_width, src_height, dst_data, dst_step, + dst_width, dst_height)); + } + if (interpolation != CV_HAL_INTER_LINEAR) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 096207f9472c3d65c0cf84ac91cb3ac817063e6b..9da67136069f93f5a1de36dd216054d14904fb5f 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -172,14 +172,8 @@ BENCH_MIN_MAX(min_max_s32, int32_t); BENCH_MIN_MAX(min_max_f32, float); template -static void resize_linear(F f, size_t scale_x, size_t scale_y, - benchmark::State& state) { - // Setup - size_t src_width = image_width / scale_x; - size_t src_height = image_height / scale_y; - size_t dst_width = src_width * scale_x; - size_t dst_height = src_height * scale_y; - +static void resize(F f, size_t src_width, size_t src_height, size_t dst_width, + size_t dst_height, benchmark::State& state) { bench_functor(state, [f, src_width, src_height, dst_width, dst_height]() { (void)f(get_source_buffer_a(), src_width * sizeof(T), src_width, src_height, get_destination_buffer(), dst_width * sizeof(T), @@ -187,23 +181,46 @@ static void resize_linear(F f, size_t scale_x, size_t scale_y, }); } +template +static void resize_upscale(F f, size_t scale_x, size_t scale_y, + benchmark::State& state) { + size_t src_width = image_width / scale_x; + size_t src_height = image_height / scale_y; + resize(f, src_width, src_height, src_width * scale_x, src_height * scale_y, + state); +} + +template +static void resize_downscale(F f, size_t scale_x, size_t scale_y, + benchmark::State& state) { + size_t dst_width = image_width / scale_x; + size_t dst_height = image_height / scale_y; + resize(f, dst_width * scale_x, dst_height * scale_y, dst_width, dst_height, + state); +} + +static void resize_quarter_u8(benchmark::State& state) { + resize_downscale(kleidicv_resize_to_quarter_u8, 2, 2, state); +} +BENCHMARK(resize_quarter_u8); + static void resize_linear_2x2_u8(benchmark::State& state) { - resize_linear(kleidicv_resize_linear_u8, 2, 2, state); + resize_upscale(kleidicv_resize_linear_u8, 2, 2, state); } BENCHMARK(resize_linear_2x2_u8); static void resize_linear_4x4_u8(benchmark::State& state) { - resize_linear(kleidicv_resize_linear_u8, 4, 4, state); + resize_upscale(kleidicv_resize_linear_u8, 4, 4, state); } BENCHMARK(resize_linear_4x4_u8); static void resize_linear_2x2_f32(benchmark::State& state) { - resize_linear(kleidicv_resize_linear_f32, 2, 2, state); + resize_upscale(kleidicv_resize_linear_f32, 2, 2, state); } BENCHMARK(resize_linear_2x2_f32); static void resize_linear_4x4_f32(benchmark::State& state) { - resize_linear(kleidicv_resize_linear_f32, 4, 4, state); + resize_upscale(kleidicv_resize_linear_f32, 4, 4, state); } BENCHMARK(resize_linear_4x4_f32); diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 97103f97ec06f07d3811e54a09cc10aee0eb6d86..6c53bcaea75a48c00665d826b9d8a40775668cc9 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -38,6 +38,7 @@ add_executable( test_float_conv.cpp test_scale.cpp test_min_max.cpp + test_resize.cpp ) target_link_libraries( @@ -81,6 +82,7 @@ add_executable( test_float_conv.cpp test_scale.cpp test_min_max.cpp + test_resize.cpp ) target_link_libraries( diff --git a/conformity/opencv/test_resize.cpp b/conformity/opencv/test_resize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..966ccf922200224d85ec74146cf7460a70c6180d --- /dev/null +++ b/conformity/opencv/test_resize.cpp @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 +#include "test_resize.h" + +#include +#include +#include + +#include "opencv2/core/hal/interface.h" +#include "opencv2/imgproc/hal/interface.h" + +// Factor is interpreted as 1/1000, i.e. 500 for 0.5 +template +cv::Mat exec_resize(cv::Mat& input_mat) { + cv::Mat result(input_mat.size().height * Factor / 1000, + input_mat.size().width * Factor / 1000, input_mat.type()); + resize(input_mat, result, result.size(), 0, 0, Type); + return result; +} + +#if MANAGER +template +bool test_resize(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= MaxSize; ++x) { + for (size_t y = 5; y <= MaxSize; ++y) { + cv::Mat input_mat(x, y, Format); + rng.fill(input_mat, cv::RNG::NORMAL, 0.0, 1.0e10); + cv::Mat actual_mat = exec_resize(input_mat); + cv::Mat expected_mat = get_expected_from_subordinate( + index, request_queue, reply_queue, input_mat); + + bool success = + (CV_MAT_DEPTH(Format) == CV_32F && + !are_float_matrices_different(1e-5, actual_mat, + expected_mat)) || + (CV_MAT_DEPTH(Format) == CV_8U && + !are_matrices_different(0, actual_mat, expected_mat)); + if (!success) { + fail_print_matrices(x, y, input_mat, actual_mat, expected_mat); + } + } + } + + return false; +} +#endif + +std::vector& resize_tests_get() { + // clang-format off + static std::vector tests = { + TEST("Resize4x4 float32, INTER_LINEAR", (test_resize<4000, CV_HAL_INTER_LINEAR, 16, CV_32FC4>), (exec_resize<4000, CV_HAL_INTER_LINEAR>)), + TEST("Resize2x2 float32, INTER_LINEAR", (test_resize<2000, CV_HAL_INTER_LINEAR, 16, CV_32FC4>), (exec_resize<2000, CV_HAL_INTER_LINEAR>)), + + TEST("Resize0.5x0.5 uint8, INTER_AREA", (test_resize<500, CV_HAL_INTER_AREA, 32, CV_8UC4>), (exec_resize<500, CV_HAL_INTER_AREA>)), + TEST("Resize0.5x0.5 uint8, INTER_LINEAR", (test_resize<500, CV_HAL_INTER_LINEAR, 32, CV_8UC4>), (exec_resize<500, CV_HAL_INTER_LINEAR>)), + TEST("Resize2x2 uint8, INTER_LINEAR", (test_resize<2000, CV_HAL_INTER_LINEAR, 16, CV_8UC4>), (exec_resize<2000, CV_HAL_INTER_LINEAR>)), + TEST("Resize4x4 uint8, INTER_LINEAR", (test_resize<4000, CV_HAL_INTER_LINEAR, 16, CV_8UC4>), (exec_resize<4000, CV_HAL_INTER_LINEAR>)), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/test_resize.h b/conformity/opencv/test_resize.h new file mode 100644 index 0000000000000000000000000000000000000000..85387c63a62016c043da1ab126e58463037cbd92 --- /dev/null +++ b/conformity/opencv/test_resize.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_RESIZE_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_RESIZE_H_ + +#include + +#include "tests.h" + +std::vector& resize_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_RESIZE_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index a24a5ec555f3466bf4ae1071f1cbfc4539658ff3..4924120ffefa66322e91735f5206c92655014671 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -15,6 +15,7 @@ #include "test_float_conv.h" #include "test_gaussian_blur.h" #include "test_min_max.h" +#include "test_resize.h" #include "test_rgb2yuv.h" #include "test_scale.h" #include "test_sobel.h" @@ -36,6 +37,7 @@ std::vector all_tests = merge_tests({ sobel_tests_get, exp_tests_get, float_conversion_tests_get, + resize_tests_get, scale_tests_get, min_max_tests_get, }); diff --git a/doc/functionality.md b/doc/functionality.md index 30bb835c24386c885afb669d302502e1e41e2aee..ed0cbc92376c3f20a8d374cc2a3da5ed532a125a 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -71,6 +71,11 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Sobel (3x3) | x | | Gaussian Blur (3x3, 5x5, 7x7) | x | +## Resize to quarter +| | u8 | +|-------------|-----| +| 0.5x0.5 | x | + ## Resize with linear interpolation | | u8 | f32 | |-------------|-----|-----| diff --git a/doc/opencv.md b/doc/opencv.md index e8b38fa0ab5359848f6ad3360e7451781607a387..8b2c8c83db314908ea924ae5a94ad26ae639d850 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -146,9 +146,9 @@ Release context set up by [`morphology_init`](#morphology_init). Notes on parameters: * In-place operation not supported. * `src_type` - only supports `CV_8UC1` or, for 2*2 resize only, `CV_32FC1`. -* `dst_width`,`dst_height` - must both be the same multiple of `src_width` and `src_height` respectively, and that multiple must be either 2 or 4. +* `dst_width`,`dst_height` - must both be the same multiple of `src_width` and `src_height` respectively, and that multiple must be either 0.5, 2 or 4. * `inv_scale_x`,`inv_scale_y` - must be 0 or `dst_width / src_width`. -* `border_type` - Must be `INTER_LINEAR`. +* `interpolation` - Must be `INTER_LINEAR` or `INTER_AREA` (0.5 by 0.5 only). ### `sobel` Applies Sobel gradient filter to a given image. diff --git a/kleidicv/src/resize/resize_sc.h b/kleidicv/src/resize/resize_sc.h index 43cf2174e1426664b4a88d017c9c989ee21fc360..b67d8bc6610b5fe020cfd965c5598954045c5afe 100644 --- a/kleidicv/src/resize/resize_sc.h +++ b/kleidicv/src/resize/resize_sc.h @@ -23,14 +23,18 @@ static inline svuint8_t resize_parallel_vectors(svbool_t pg, svuint8_t top_row, static inline void parallel_rows_vectors_path_2x( svbool_t pg, Rows src_rows, Rows dst_rows) KLEIDICV_STREAMING_COMPATIBLE { - svuint8_t top_line0 = svld1(pg, &src_rows.at(0)[0]); - svuint8_t bottom_line0 = svld1(pg, &src_rows.at(1)[0]); - svuint8_t top_line1 = svld1_vnum(pg, &src_rows.at(0)[0], 1); - svuint8_t bottom_line1 = svld1_vnum(pg, &src_rows.at(1)[0], 1); - svuint8_t result0 = resize_parallel_vectors(pg, top_line0, bottom_line0); - svuint8_t result1 = resize_parallel_vectors(pg, top_line1, bottom_line1); - svst1b(pg, &dst_rows[0], svreinterpret_u16_u8(result0)); - svst1b_vnum(pg, &dst_rows[0], 1, svreinterpret_u16_u8(result1)); + svuint8_t top_row_0 = svld1(pg, &src_rows.at(0)[0]); + svuint8_t bottom_row_0 = svld1(pg, &src_rows.at(1)[0]); + svuint8_t top_row_1 = svld1_vnum(pg, &src_rows.at(0)[0], 1); + svuint8_t bottom_row_1 = svld1_vnum(pg, &src_rows.at(1)[0], 1); + svuint16_t sum0b = svaddlb(top_row_0, bottom_row_0); + svuint16_t sum0t = svaddlt(top_row_0, bottom_row_0); + svuint16_t sum1b = svaddlb(top_row_1, bottom_row_1); + svuint16_t sum1t = svaddlt(top_row_1, bottom_row_1); + svuint8_t res0 = svrshrnb(svadd_x(pg, sum0b, sum0t), 2); + svuint8_t res1 = svrshrnb(svadd_x(pg, sum1b, sum1t), 2); + svuint8_t result = svuzp1(res0, res1); + svst1(pg, &dst_rows[0], result); } static inline void parallel_rows_vectors_path( diff --git a/scripts/benchmark/run_benchmarks_4K.sh b/scripts/benchmark/run_benchmarks_4K.sh index 09b0d1bc1f92f4460a50870c2dd6795c5dde3a25..592147fc4ca43081fb71b0cffaea6fca959b3ce7 100755 --- a/scripts/benchmark/run_benchmarks_4K.sh +++ b/scripts/benchmark/run_benchmarks_4K.sh @@ -48,6 +48,7 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Erode5x5 opencv_perf_imgproc '*Erode_big*' '(3840x2160, 8UC1, 5)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Erode17x17 opencv_perf_imgproc '*Erode_big*' '(3840x2160, 8UC1, 17)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize_0.5_8b opencv_perf_imgproc '*ResizeAreaFast*' '(8UC1, 3840x2160, 2)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize2x2_8b opencv_perf_imgproc '*resizeUpLinearNonExact*' '(8UC1, (1920x1080, 3840x2160))')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize2x2_float opencv_perf_imgproc '*resizeUpLinearNonExact*' '(32FC1, (1920x1080, 3840x2160))')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize4x4_8b opencv_perf_imgproc '*resizeUpLinearNonExact*' '(8UC1, (960x540, 3840x2160))')") diff --git a/scripts/benchmark/run_benchmarks_FHD.sh b/scripts/benchmark/run_benchmarks_FHD.sh index 300e677b53b76fa6359e338012b3641d3163f9f3..bfc8430e0b0d45f1764aaa22f83b4e4250feafe4 100755 --- a/scripts/benchmark/run_benchmarks_FHD.sh +++ b/scripts/benchmark/run_benchmarks_FHD.sh @@ -48,6 +48,7 @@ RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Erode5x5 opencv_perf_imgproc '*Erode_big*' '(1920x1080, 8UC1, 5)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Erode17x17 opencv_perf_imgproc '*Erode_big*' '(1920x1080, 8UC1, 17)')") +RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize_0.5_8b opencv_perf_imgproc '*ResizeAreaFast*' '(8UC1, 1920x1080, 2)')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize2x2_8b opencv_perf_imgproc '*resizeUpLinearNonExact*' '(8UC1, (960x540, 1920x1080))')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize2x2_float opencv_perf_imgproc '*resizeUpLinearNonExact*' '(32FC1, (960x540, 1920x1080))')") RES+=$(printf "\n$(${DEV_DIR}/perf_test_op.sh $CUSTOM_BUILD_SUFFIX $CPU $THERMAL Resize4x4_8b opencv_perf_imgproc '*resizeUpLinearNonExact*' '(8UC1, (480x270, 1920x1080))')") diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index a11bc2370f2eb5653e71dd70d5e9f0b5e5f5890d..bf3afa6597e825992ae9759807cacb6a54e39860 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -46,6 +46,7 @@ IMGPROC_TEST_PATTERNS=( '*Imgproc_GaussianBlur*' '*Imgproc_Sobel*' '*Imgproc_Canny*' + '*Imgproc_Resize*' ) IMGPROC_TEST_PATTERNS_STR="$(join_strings_with_colon "${IMGPROC_TEST_PATTERNS[*]}")" ../../../conformity/opencv_kleidicv/bin/opencv_test_imgproc \