From f8cc6188519059f6d73bb8ce67e5defe6c9f330d Mon Sep 17 00:00:00 2001 From: Noureldin Abdelfattah Date: Wed, 6 Aug 2025 16:48:45 +0100 Subject: [PATCH 1/2] Add RGBx & BGRx to YUV420 Neon --- adapters/opencv/kleidicv_hal.cpp | 78 ++++ adapters/opencv/kleidicv_hal.h | 33 ++ benchmark/benchmark.cpp | 61 +++ conformity/opencv/test_rgb2yuv420p.cpp | 72 ++++ ...st_cvtcolor.cpp => test_yuv420_to_rgb.cpp} | 11 +- conformity/opencv/tests.cpp | 3 +- conformity/opencv/tests.h | 3 +- doc/functionality.md | 52 +-- doc/opencv.md | 33 ++ .../kleidicv/conversions/rgb_to_yuv_420.h | 155 +++++++ kleidicv/include/kleidicv/kleidicv.h | 111 +++++ kleidicv/src/conversions/rgb_to_yuv420_neon.h | 400 ++++++++++++++++++ .../src/conversions/rgb_to_yuv420p_api.cpp | 59 +++ .../src/conversions/rgb_to_yuv420p_neon.cpp | 72 ++++ .../src/conversions/rgb_to_yuv420sp_api.cpp | 69 +++ .../src/conversions/rgb_to_yuv420sp_neon.cpp | 76 ++++ .../include/kleidicv_thread/kleidicv_thread.h | 65 +++ kleidicv_thread/src/kleidicv_thread.cpp | 97 +++++ scripts/benchmark/benchmarks.txt | 5 + scripts/ci-opencv.sh | 4 + test/api/test_rgb_to_yuv_420_p.cpp | 238 +++++++++++ test/api/test_rgb_to_yuv_420_sp.cpp | 256 +++++++++++ test/api/test_thread_rgb_to_yuv_p.cpp | 65 +++ test/api/test_thread_rgb_to_yuv_sp.cpp | 74 ++++ 24 files changed, 2061 insertions(+), 31 deletions(-) create mode 100644 conformity/opencv/test_rgb2yuv420p.cpp rename conformity/opencv/{test_cvtcolor.cpp => test_yuv420_to_rgb.cpp} (85%) create mode 100644 kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h create mode 100644 kleidicv/src/conversions/rgb_to_yuv420_neon.h create mode 100644 kleidicv/src/conversions/rgb_to_yuv420p_api.cpp create mode 100644 kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp create mode 100644 kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp create mode 100644 kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp create mode 100644 test/api/test_rgb_to_yuv_420_p.cpp create mode 100644 test/api/test_rgb_to_yuv_420_sp.cpp create mode 100644 test/api/test_thread_rgb_to_yuv_p.cpp create mode 100644 test/api/test_thread_rgb_to_yuv_sp.cpp diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 6f8af0de2..5a0cc3774 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -317,6 +317,84 @@ int yuv_to_bgr(const uchar *src_data, size_t src_step, uchar *dst_data, return CV_HAL_ERROR_NOT_IMPLEMENTED; } +int bgr_to_yuv420_p(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int scn, + bool swapBlue, int uIdx) { + const bool is_bgr = !swapBlue; + const bool is_nv21 = (uIdx != 1); + auto mt = get_multithreading(); + if (scn == 3) { + if (is_bgr) { + return convert_error(kleidicv_thread_bgr_to_yuv420_p_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), is_nv21, + mt)); + } + return convert_error(kleidicv_thread_rgb_to_yuv420_p_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), is_nv21, mt)); + } + + if (scn == 4) { + if (is_bgr) { + return convert_error(kleidicv_thread_bgra_to_yuv420_p_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), is_nv21, + mt)); + } + return convert_error(kleidicv_thread_rgba_to_yuv420_p_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), is_nv21, mt)); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + +int bgr_to_yuv420_sp(const uchar *src_data, size_t src_step, uchar *y_data, + size_t y_step, uchar *uv_data, size_t uv_step, int width, + int height, int scn, bool swapBlue, int uIdx) { + const bool is_bgr = !swapBlue; + const bool is_nv21 = (uIdx != 1); + auto mt = get_multithreading(); + if (scn == 3) { + if (is_bgr) { + return convert_error(kleidicv_thread_bgr_to_yuv420_sp_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(y_data), y_step, + reinterpret_cast(uv_data), uv_step, + static_cast(width), static_cast(height), is_nv21, + mt)); + } + return convert_error(kleidicv_thread_rgb_to_yuv420_sp_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(y_data), y_step, + reinterpret_cast(uv_data), uv_step, + static_cast(width), static_cast(height), is_nv21, mt)); + } + + if (scn == 4) { + if (is_bgr) { + return convert_error(kleidicv_thread_bgra_to_yuv420_sp_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(y_data), y_step, + reinterpret_cast(uv_data), uv_step, + static_cast(width), static_cast(height), is_nv21, + mt)); + } + return convert_error(kleidicv_thread_rgba_to_yuv420_sp_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(y_data), y_step, + reinterpret_cast(uv_data), uv_step, + static_cast(width), static_cast(height), is_nv21, mt)); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + int bgr_to_yuv(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr) { diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index 4d3c95989..bf4171337 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -29,6 +29,14 @@ int bgr_to_bgr(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int depth, int scn, int dcn, bool swapBlue); +int bgr_to_yuv420_p(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int scn, + bool swapBlue, int uIdx); + +int bgr_to_yuv420_sp(const uchar *src_data, size_t src_step, uchar *y_data, + size_t y_step, uchar *uv_data, size_t uv_step, int width, + int height, int scn, bool swapBlue, int uIdx); + int yuv_to_bgr_sp(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx); @@ -219,6 +227,31 @@ static inline int kleidicv_bgr_to_bgr_with_fallback( #undef cv_hal_cvtBGRtoBGR #define cv_hal_cvtBGRtoBGR kleidicv_bgr_to_bgr_with_fallback +// bgr_to_yuv420_p +static inline int kleidicv_bgr_to_yuv420_with_fallback( + const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, + int width, int height, int scn, bool swapBlue, int uIdx) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + bgr_to_yuv420_p, cv_hal_cvtBGRtoThreePlaneYUV, src_data, src_step, + dst_data, dst_step, width, height, scn, swapBlue, uIdx); +} + +#undef cv_hal_cvtBGRtoThreePlaneYUV +#define cv_hal_cvtBGRtoThreePlaneYUV kleidicv_bgr_to_yuv420_with_fallback + +// bgr_to_yuv420_sp +static inline int kleidicv_bgr_to_yuv420sp_with_fallback( + const uchar *src_data, size_t src_step, uchar *y_data, size_t y_step, + uchar *uv_data, size_t uv_step, int width, int height, int scn, + bool swapBlue, int uIdx) { + return KLEIDICV_HAL_FALLBACK_FORWARD( + bgr_to_yuv420_sp, cv_hal_cvtBGRtoTwoPlaneYUV, src_data, src_step, y_data, + y_step, uv_data, uv_step, width, height, scn, swapBlue, uIdx); +} + +#undef cv_hal_cvtBGRtoTwoPlaneYUV +#define cv_hal_cvtBGRtoTwoPlaneYUV kleidicv_bgr_to_yuv420sp_with_fallback + // yuv_to_bgr_sp static inline int kleidicv_yuv_to_bgr_sp_with_fallback( const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 6ee20f15d..444e38612 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -772,6 +772,67 @@ static void yuv_p_to_bgra(benchmark::State& state) { yuv_p<4>(kleidicv_yuv_p_to_bgra_u8, state); } BENCHMARK(yuv_p_to_bgra); +template +static void rgb_to_yuv420_p_imp(Function f, benchmark::State& state) { + bench_functor(state, [f]() { + (void)f(get_source_buffer_a(), + InChannels * image_width * sizeof(uint8_t), + get_destination_buffer_a(), + image_width * sizeof(uint8_t), image_width, image_height, true); + }); +} + +static void rgb_to_yuv_p(benchmark::State& state) { + rgb_to_yuv420_p_imp<3>(kleidicv_rgb_to_yuv420_p_u8, state); +} +BENCHMARK(rgb_to_yuv_p); + +static void rgba_to_yuv_p(benchmark::State& state) { + rgb_to_yuv420_p_imp<4>(kleidicv_rgba_to_yuv420_p_u8, state); +} +BENCHMARK(rgba_to_yuv_p); + +static void bgr_to_yuv_p(benchmark::State& state) { + rgb_to_yuv420_p_imp<3>(kleidicv_bgr_to_yuv420_p_u8, state); +} +BENCHMARK(bgr_to_yuv_p); + +static void bgra_to_yuv_p(benchmark::State& state) { + rgb_to_yuv420_p_imp<4>(kleidicv_bgra_to_yuv420_p_u8, state); +} +BENCHMARK(bgra_to_yuv_p); + +template +static void rgb_to_yuv420_sp_imp(Function f, benchmark::State& state) { + bench_functor(state, [f]() { + (void)f( + get_source_buffer_a(), + InChannels * image_width * sizeof(uint8_t), + get_destination_buffer_a(), image_width * sizeof(uint8_t), + get_destination_buffer_b(), + (image_width / 2) * sizeof(uint8_t), image_width, image_height, true); + }); +} + +static void rgb_to_yuv_sp(benchmark::State& state) { + rgb_to_yuv420_sp_imp<3>(kleidicv_rgb_to_yuv420_sp_u8, state); +} +BENCHMARK(rgb_to_yuv_sp); + +static void rgba_to_yuv_sp(benchmark::State& state) { + rgb_to_yuv420_sp_imp<4>(kleidicv_rgba_to_yuv420_sp_u8, state); +} +BENCHMARK(rgba_to_yuv_sp); + +static void bgr_to_yuv_sp(benchmark::State& state) { + rgb_to_yuv420_sp_imp<3>(kleidicv_bgr_to_yuv420_sp_u8, state); +} +BENCHMARK(bgr_to_yuv_sp); + +static void bgra_to_yuv_sp(benchmark::State& state) { + rgb_to_yuv420_sp_imp<4>(kleidicv_bgra_to_yuv420_sp_u8, state); +} +BENCHMARK(bgra_to_yuv_sp); template static void morphology(Function f, benchmark::State& state) { diff --git a/conformity/opencv/test_rgb2yuv420p.cpp b/conformity/opencv/test_rgb2yuv420p.cpp new file mode 100644 index 000000000..f6a9e7dc7 --- /dev/null +++ b/conformity/opencv/test_rgb2yuv420p.cpp @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "tests.h" + +template +static cv::Mat exec_cvtcolor(cv::Mat& input) { + cv::Mat result; + cv::cvtColor(input, result, Code); + return result; +} + +#if MANAGER +template +bool test_rgb2yuv420p(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + auto check = [&](size_t x, size_t y) -> bool { + cv::Mat input(x, y, CV_8UC(channels)); + rng.fill(input, cv::RNG::UNIFORM, 0, 255); + + cv::Mat actual = exec_cvtcolor(input); + cv::Mat expected = + get_expected_from_subordinate(index, request_queue, reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + return false; + }; + + // OpenCV only accepts two-plane images with an even number of columns & rows. + for (size_t x = 4; x <= 32; x += 2) { + for (size_t y = 2; y <= 32; y += 2) { + if (check(x, y)) { + return true; + } + } + } + + // Check taller images - this number of rows was necessary to trigger a bug on + // a machine with 64 cores. + if (check(36, 1000)) { + return true; + } + + return false; +} +#endif + +#define CVTCOLOR_TEST(code, channel) \ + TEST(#code, (test_rgb2yuv420p), \ + exec_cvtcolor) + +std::vector& rgb2yuv420_tests_get() { + // clang-format off + static std::vector tests = { + CVTCOLOR_TEST(RGB2YUV_YV12 , 3), + CVTCOLOR_TEST(RGBA2YUV_YV12, 4), + CVTCOLOR_TEST(BGR2YUV_YV12 , 3), + CVTCOLOR_TEST(BGRA2YUV_YV12, 4), + CVTCOLOR_TEST(BGR2YUV_IYUV , 3), + CVTCOLOR_TEST(BGRA2YUV_IYUV, 4), + CVTCOLOR_TEST(RGB2YUV_IYUV , 3), + CVTCOLOR_TEST(RGBA2YUV_IYUV, 4), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/test_cvtcolor.cpp b/conformity/opencv/test_yuv420_to_rgb.cpp similarity index 85% rename from conformity/opencv/test_cvtcolor.cpp rename to conformity/opencv/test_yuv420_to_rgb.cpp index 232aa28a7..fbdd6af9b 100644 --- a/conformity/opencv/test_cvtcolor.cpp +++ b/conformity/opencv/test_yuv420_to_rgb.cpp @@ -13,8 +13,8 @@ static cv::Mat exec_cvtcolor(cv::Mat& input) { #if MANAGER template -bool test_cvtcolor(int index, RecreatedMessageQueue& request_queue, - RecreatedMessageQueue& reply_queue) { +bool test_yuv420_to_rgb(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { cv::RNG rng(0); auto check = [&](size_t x, size_t y) -> bool { @@ -53,10 +53,11 @@ bool test_cvtcolor(int index, RecreatedMessageQueue& request_queue, } #endif -#define CVTCOLOR_TEST(code) \ - TEST(#code, test_cvtcolor, exec_cvtcolor) +#define CVTCOLOR_TEST(code) \ + TEST(#code, test_yuv420_to_rgb, \ + exec_cvtcolor) -std::vector& cvtcolor_tests_get() { +std::vector& yuv420_to_rgb_tests_get() { // clang-format off static std::vector tests = { CVTCOLOR_TEST(YUV2BGR_YV12), diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index 55744af48..a5c792039 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -24,7 +24,7 @@ static std::vector merge_tests( std::vector all_tests = merge_tests({ binary_op_tests_get, - cvtcolor_tests_get, + yuv420_to_rgb_tests_get, morphology_tests_get, #if KLEIDICV_ENABLE_ALL_OPENCV_HAL separable_filter_2d_tests_get, @@ -45,6 +45,7 @@ std::vector all_tests = merge_tests({ blur_and_downsample_tests_get, scharr_interleaved_tests_get, median_blur_tests_get, + rgb2yuv420_tests_get, }); #if MANAGER diff --git a/conformity/opencv/tests.h b/conformity/opencv/tests.h index b7d4238b8..ab93fcf52 100644 --- a/conformity/opencv/tests.h +++ b/conformity/opencv/tests.h @@ -10,7 +10,7 @@ #include "utils.h" std::vector& binary_op_tests_get(); -std::vector& cvtcolor_tests_get(); +std::vector& yuv420_to_rgb_tests_get(); std::vector& morphology_tests_get(); std::vector& separable_filter_2d_tests_get(); std::vector& gaussian_blur_tests_get(); @@ -29,5 +29,6 @@ std::vector& warp_perspective_tests_get(); std::vector& blur_and_downsample_tests_get(); std::vector& scharr_interleaved_tests_get(); std::vector& median_blur_tests_get(); +std::vector& rgb2yuv420_tests_get(); #endif // KLEIDICV_OPENCV_CONFORMITY_TESTS_H_ diff --git a/doc/functionality.md b/doc/functionality.md index 765af9c1e..57bf1d463 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -29,30 +29,34 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | Bitwise And | x | ## Color conversions -| | u8 | -|------------------------------|-----| -| Gray-RGB | x | -| Gray-RGBA | x | -| RGB-BGR | x | -| BGR-RGB | x | -| RGBA-BGRA | x | -| BGRA-RGBA | x | -| YUV420 (planar) - BGR | x | -| YUV420 (planar) - BGRA | x | -| YUV420 (planar) - RGB | x | -| YUV420 (planar) - RGBA | x | -| YUV420 (semi-planar) - BGR | x | -| YUV420 (semi-planar) - BGRA | x | -| YUV420 (semi-planar) - RGB | x | -| YUV420 (semi-planar) - RGBA | x | -| YUV-BGR | x | -| YUV-RGB | x | -| YUV-BGRA | x | -| YUV-RGBA | x | -| RGB-YUV | x | -| RGBA-YUV | x | -| BGR-YUV | x | -| BGRA-YUV | x | +| | u8 | +|------------------------------------|-----| +| Gray-RGB | x | +| Gray-RGBA | x | +| RGB-BGR | x | +| BGR-RGB | x | +| RGBA-BGRA | x | +| BGRA-RGBA | x | +| YUV420 (planar) - BGR | x | +| YUV420 (planar) - BGRA | x | +| YUV420 (planar) - RGB | x | +| YUV420 (planar) - RGBA | x | +| YUV420 (semi-planar) - BGR | x | +| YUV420 (semi-planar) - BGRA | x | +| YUV420 (semi-planar) - RGB | x | +| YUV420 (semi-planar) - RGBA | x | +| YUV-BGR | x | +| YUV-RGB | x | +| YUV-BGRA | x | +| YUV-RGBA | x | +| RGB-YUV | x | +| RGBA-YUV | x | +| BGR-YUV | x | +| BGRA-YUV | x | +| RGB-YUV420 (planar & semi-planar) | x | +| RGBA-YUV420 (planar & semi-planar) | x | +| BGR-YUV420 (planar & semi-planar) | x | +| BGRA-YUV420 (planar & semi-planar) | x | ## Data type conversions | | u8 | s8 | f32 | diff --git a/doc/opencv.md b/doc/opencv.md index e5303182a..f3e7e8e29 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -99,6 +99,39 @@ Notes on parameters: * `src.depth()` - only supports `CV_8U` depth. * `src.channels()` - supports 3 for RGB/BGR and 4 for RGBA/BGRA. + +#### [`COLOR_RGB2YUV_IYUV`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=V%2C%20see%20color_convert_rgb_yuv_42x-,COLOR_RGB2YUV_IYUV%C2%A0,-Python%3A%20cv.COLOR_RGB2YUV_IYUV), [`COLOR_BGR2YUV_IYUV`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=synonym%20to%20I420-,COLOR_BGR2YUV_IYUV,-Python%3A%20cv.COLOR_BGR2YUV_IYUV), [`COLOR_RGBA2YUV_IYUV`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=V%2C%20see%20color_convert_rgb_yuv_42x-,COLOR_RGBA2YUV_IYUV,-Python%3A%20cv.COLOR_RGBA2YUV_IYUV), [`COLOR_BGRA2YUV_IYUV`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=synonym%20to%20I420-,COLOR_BGRA2YUV_IYUV,-Python%3A%20cv.COLOR_BGRA2YUV_IYUV) + +RGB/BGR(A/X) to YUV420 planar (IYUV/YUV420p) conversion. +This transformation outputs three separate planes: Y, U, and V, where chroma channels (U and V) are subsampled by 2 in both horizontal and vertical directions. +RGBX/BGRX and RGBA/BGRA formats are supported, with the last (X or Alpha) channel ignored during the conversion. + +| | RGB | BGR | RGBA | BGRA | RGBX | BGRX | +|---------|-----|-----|------|------|------|------| +| YUV420p | x | x | x | x | x | x | + +**Notes on parameters:** +- `src.depth()` — only supports `CV_8U` depth. +- `src.channels()` — supports 3 (RGB/BGR), 4 (RGBA/BGRA/RGBX/BGRX). For RGBX/BGRX, the X channel is ignored. +- `dst` — output is a single image containing the Y plane followed by U and V planes (IYUV layout). + + +#### [`COLOR_RGB2YUV_YV12`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=synonym%20to%20I420-,COLOR_RGB2YUV_YV12,-Python%3A%20cv.COLOR_RGB2YUV_YV12), [`COLOR_BGR2YUV_YV12`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=U%2C%20see%20color_convert_rgb_yuv_42x-,COLOR_BGR2YUV_YV12,-Python%3A%20cv.COLOR_BGR2YUV_YV12), [`COLOR_RGBA2YUV_YV12`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=U%2C%20see%20color_convert_rgb_yuv_42x-,COLOR_RGBA2YUV_YV12,-Python%3A%20cv.COLOR_RGBA2YUV_YV12), [`COLOR_BGRA2YUV_YV12`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab91f1a5041e1d4b7b0c1f4d0b69479e5:~:text=U%2C%20see%20color_convert_rgb_yuv_42x-,COLOR_BGRA2YUV_YV12,-Python%3A%20cv.COLOR_BGRA2YUV_YV12) + +RGB/BGR(A/X) to YUV420 planar (YV12) conversion. +Like IYUV, this format stores Y, U, and V planes with 4:2:0 chroma subsampling. The only difference is that U and V planes are **swapped** in memory layout: Y is followed by V then U. +RGBX/BGRX and RGBA/BGRA formats are supported, with the last (X or Alpha) channel ignored during the conversion. + +| | RGB | BGR | RGBA | BGRA | RGBX | BGRX | +|---------|-----|-----|------|------|------|------| +| YUV420p | x | x | x | x | x | x | + +**Notes on parameters:** +- `src.depth()` — only supports `CV_8U` depth. +- `src.channels()` — supports 3 (RGB/BGR), 4 (RGBA/BGRA/RGBX/BGRX). For RGBX/BGRX, the X channel is ignored. +- `dst` — output is a single image containing the Y plane followed by V and U planes (YV12 layout). + + ### [`cv::GaussianBlur()`](https://docs.opencv.org/4.11.0/d4/d86/group__imgproc__filter.html#gae8bdcd9154ed5ca3cbc1766d960f45c1) > ⚠️ **The operation is not bitexact with OpenCV due to rounding differences even if ALGO_HINT_ACCURATE is used as the hint parameter.** diff --git a/kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h b/kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h new file mode 100644 index 000000000..393335744 --- /dev/null +++ b/kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_CONVERSIONS_RGB_TO_YUV420_H +#define KLEIDICV_CONVERSIONS_RGB_TO_YUV420_H + +#include + +#include "kleidicv/config.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/types.h" + +extern "C" { +// For internal use only. See instead `kleidicv_rgb_to_yuv420_sp_u8`. +// Converts a stripe (i.e., a row range) of a RGB image to a planar YUV420 image +// format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_rgb_to_yuv420_sp_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, size_t uv_stride, + size_t width, size_t height, bool is_nv21, + size_t begin, size_t end); + +// For internal use only. See instead `kleidicv_rgba_to_yuv420_sp_u8`. +// Converts a stripe (i.e., a row range) of a RGBA image to a planar YUV420 +// image format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_rgba_to_yuv420_sp_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, size_t uv_stride, + size_t width, size_t height, bool is_nv21, + size_t begin, size_t end); + +// For internal use only. See instead `kleidicv_bgr_to_yuv420_sp_u8`. +// Converts a stripe (i.e., a row range) of a BGR image to a planar YUV420 image +// format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_bgr_to_yuv420_sp_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, size_t uv_stride, + size_t width, size_t height, bool is_nv21, + size_t begin, size_t end); + +// For internal use only. See instead `kleidicv_bgra_to_yuv420_sp_u8`. +// Converts a stripe (i.e., a row range) of a BGRA image to a planar YUV420 +// image format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_bgra_to_yuv420_sp_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, size_t uv_stride, + size_t width, size_t height, bool is_nv21, + size_t begin, size_t end); + +// For internal use only. See instead `kleidicv_rgb_to_yuv420_p_u8`. +// Converts a stripe (i.e., a row range) of a RGB image to a planar YUV420 image +// format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_rgb_to_yuv420_p_stripe_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, + size_t begin, size_t end); + +// For internal use only. See instead `kleidicv_rgba_to_yuv420_p_u8`. +// Converts a stripe (i.e., a row range) of a RGBA image to a planar YUV420 +// image format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_rgba_to_yuv420_p_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end); + +// For internal use only. See instead `kleidicv_bgr_to_yuv420_p_u8`. +// Converts a stripe (i.e., a row range) of a BGR image to a planar YUV420 image +// format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_bgr_to_yuv420_p_stripe_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, + size_t begin, size_t end); + +// For internal use only. See instead `kleidicv_bgra_to_yuv420_p_u8`. +// Converts a stripe (i.e., a row range) of a BGRA image to a planar YUV420 +// image format (I420 or YV12). The stripe is defined by the range [begin, end]. +KLEIDICV_API_DECLARATION(kleidicv_bgra_to_yuv420_p_stripe_u8, + const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end); +} +namespace kleidicv { + +// Coefficients for RGB to YUV420p conversion +static constexpr int kRYWeight = + 269484; // 0.299055 * (236-16)/256 * (1 << kWeightScale) +static constexpr int kGYWeight = + 528482; // 0.586472 * (236-16)/256 * (1 << kWeightScale) +static constexpr int kBYWeight = + 102760; // 0.114035 * (236-16)/256 * (1 << kWeightScale) +static constexpr int kRUWeight = -155188; // -0.148 * (1 << (kWeightScale-1)) +static constexpr int kGUWeight = -305135; // -0.291 * (1 << (kWeightScale-1)) +static constexpr int kBUWeight = 460324; // 0.439 * (1 << (kWeightScale-1)) +static constexpr int kGVWeight = -385875; // -0.368 * (1 << (kWeightScale-1)) +static constexpr int kBVWeight = -74448; // -0.071 * (1 << (kWeightScale-1)) + +namespace neon { +kleidicv_error_t rgb_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgba_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgr_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgra_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgb_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgba_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgr_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgra_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +} // namespace neon + +} // namespace kleidicv + +#endif // KLEIDICV_CONVERSIONS_RGB_TO_YUV420_H diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index cdb2e0160..ce3cac37d 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -868,6 +868,117 @@ KLEIDICV_API_DECLARATION(kleidicv_rgba_to_yuv_u8, const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +#define KLEIDICV_OP_RGB_TO_YUV420P(name) \ + kleidicv_error_t name(const uint8_t *src, size_t src_stride, uint8_t *dst, \ + size_t dst_stride, size_t width, size_t height, \ + bool is_nv21); + +/// Converts an interleaved RGB, RGBA, BGR, or BGRA image to planar YUV420 +/// format (I420 or YV12 layout). All channels are 8-bit wide. If the input +/// format includes an alpha channel, it is ignored. +/// +/// ### Source format +/// Source data has 3 or 4 interleaved channels per pixel: +/// - R, G, B +/// - B, G, R +/// - R, G, B, Alpha +/// - B, G, R, Alpha +/// +/// One pixel occupies 3 or 4 bytes, depending on the format. There is no +/// padding between pixels. +/// +/// ### Destination format: Planar YUV420 +/// The output buffer consists of three planes concatenated in memory: +/// - Y plane: full resolution, size = width × height +/// - U plane: quarter resolution, size = (width / 2) × (height / 2) +/// - V plane: quarter resolution, size = (width / 2) × (height / 2) +/// +/// If `is_nv21 == false`, the format is **I420**: Y + U + V +/// If `is_nv21 == true`, the format is **YV12**: Y + V + U +/// +/// Width and height refer to the **full image** dimensions. Total number of +/// pixels must not exceed @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source buffer containing interleaved +/// RGBX/BGRX data. +/// Must be non-null. +/// @param src_stride Byte offset between the start of one source row and the +/// next. Must be at least `(source channel count) * width`, +/// unless the image has only one row. +/// @param dst Pointer to the destination buffer to store Y + U + V or Y +/// + V + U data. Must be non-null. +/// @param dst_stride Stride (in bytes) between rows in the Y plane of the +/// output. +/// Must be at least `width`. +/// @param width Number of pixels in a row. +/// @param height Number of rows in the stripe. +/// @param is_nv21 If true, use YV12 layout (Y + V + U). Otherwise, +/// use I420 layout (Y + U + V). +KLEIDICV_OP_RGB_TO_YUV420P(kleidicv_rgb_to_yuv420_p_u8); +/// @copydoc kleidicv_rgb_to_yuv420_p_u8 +KLEIDICV_OP_RGB_TO_YUV420P(kleidicv_rgba_to_yuv420_p_u8); +/// @copydoc kleidicv_rgb_to_yuv420_p_u8 +KLEIDICV_OP_RGB_TO_YUV420P(kleidicv_bgr_to_yuv420_p_u8); +/// @copydoc kleidicv_rgb_to_yuv420_p_u8 +KLEIDICV_OP_RGB_TO_YUV420P(kleidicv_bgra_to_yuv420_p_u8); + +#define KLEIDICV_OP_RGB_TO_YUV420SP(name) \ + kleidicv_error_t name(const uint8_t *src, size_t src_stride, uint8_t *y_dst, \ + size_t y_stride, uint8_t *uv_dst, size_t uv_stride, \ + size_t width, size_t height, bool is_nv21); + +/// Converts an interleaved RGB, RGBA, BGR, or BGRA image to semi-planar +/// YUV420 format (NV12 or NV21 layout). All channels are 8-bit wide. +/// If the input format includes an alpha channel, it is ignored. +/// +/// ### Source Format +/// Source data has 3 or 4 interleaved channels per pixel: +/// - R, G, B +/// - B, G, R +/// - R, G, B, Alpha +/// - B, G, R, Alpha +/// +/// Each pixel occupies 3 or 4 bytes, depending on the format. There is no +/// padding between pixels. +/// +/// ### Destination Format: Semi-Planar YUV420 +/// The output consists of two planes: +/// - Y plane: full resolution, size = width × height +/// - UV plane: interleaved chroma (U and V) at quarter resolution, +/// size = (width / 2) × (height / 2) × 2 bytes per chroma sample +/// pair +/// +/// If `is_nv21 == false`, the format is **NV12**: Y + interleaved UV +/// If `is_nv21 == true`, the format is **NV21**: Y + interleaved VU +/// +/// Width and height refer to the full image dimensions. +/// Total number of pixels must not exceed @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source buffer containing interleaved +/// RGBX/BGRX data. Must be non-null. +/// @param src_stride Byte offset between the start of one source row and the +/// next. Must be at least `(source channel count) * width`, +/// unless the image has only one row. +/// @param y_dst Pointer to the destination Y plane. Must be non-null. +/// @param y_stride Byte offset between the start of one Y row and the next. +/// Must be at least `width`. +/// @param uv_dst Pointer to the destination UV plane (interleaved). +/// Must be non-null. +/// @param uv_stride Byte offset between the start of one UV row and the +/// next. +/// Must be at least `__builtin_align_up(width, 2)`. +/// @param width Number of pixels per row in the image. +/// @param height Number of rows in the image. +/// @param is_nv21 If true, UV plane is written in VU order (NV21). +/// Otherwise, UV plane is written in UV order (NV12). +KLEIDICV_OP_RGB_TO_YUV420SP(kleidicv_rgb_to_yuv420_sp_u8); +/// @copydoc kleidicv_rgb_to_yuv420_sp_u8 +KLEIDICV_OP_RGB_TO_YUV420SP(kleidicv_rgba_to_yuv420_sp_u8); +/// @copydoc kleidicv_rgb_to_yuv420_sp_u8 +KLEIDICV_OP_RGB_TO_YUV420SP(kleidicv_bgr_to_yuv420_sp_u8); +/// @copydoc kleidicv_rgb_to_yuv420_sp_u8 +KLEIDICV_OP_RGB_TO_YUV420SP(kleidicv_bgra_to_yuv420_sp_u8); + /// Performs a comparison of each element's value in `src` with respect to a /// caller defined threshold. The strictly larger elements are set to /// `value` and the rest to 0. diff --git a/kleidicv/src/conversions/rgb_to_yuv420_neon.h b/kleidicv/src/conversions/rgb_to_yuv420_neon.h new file mode 100644 index 000000000..4c3c9d95b --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420_neon.h @@ -0,0 +1,400 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_RGB_TO_YUV420_H +#define KLEIDICV_RGB_TO_YUV420_H + +#include +#include + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +// Coefficients for RGB to YUV420 conversion +static const int kWeightScale = 20; +template +class RGBxorBGRxToYUV420 { + public: + static kleidicv_error_t rgb2yuv420p_operation( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + size_t row_begin = begin * 2; + size_t row_end = std::min(height, end * 2); + + const uint8_t *src_row = nullptr; + uint8_t *y_row = nullptr; + uint8_t *u_row = nullptr; + uint8_t *v_row = nullptr; + for (size_t h = row_begin; h < row_end; h++) { + src_row = src + src_stride * h; + y_row = y_dst + y_stride * h; + + bool evenRow = (h & 1) == 0; + + if (evenRow) { + if constexpr (INTERLEAVE) { + u_row = uv_dst + uv_stride * (h / 2); + } else { + u_row = uv_dst + uv_stride * (h / 4) + ((h / 2) % 2) * (width / 2); + // Pointer to the start of the V plane. + // The V plane follows the U plane. Both U and V planes are + // subsampled at a 2:1 vertical ratio (i.e., each has height / 2 + // rows), and are often stored in a single contiguous chroma region in + // memory. Depending on image height and stride, the starting offset + // of V may require adjustment to maintain correct alignment. In + // particular, the chroma rows may not align perfectly, so a + // fractional offset (in rows) is applied to calculate the V plane + // position. The formula used here accounts for this by adjusting + // based on row parity, assuming consistent memory layout across the + // Y, U, and V planes. + v_row = uv_dst + uv_stride * ((h + height + 1) / 4) + + (((h + height + 1) / 2) % 2) * (width / 2); + } + } + + const size_t vsize = 16; + LoopUnroll2 loop{width, vsize}; + + loop.unroll_twice([&](size_t index) { + vector_path(src_row, y_row, u_row, v_row, is_nv21, index, evenRow); + }); + + loop.remaining([&](size_t index, size_t length) { + scalar_path(src_row, y_row, u_row, v_row, is_nv21, index, length, + evenRow); + }); + } + + return KLEIDICV_OK; + } + + private: + static void vector_path(const uint8_t *src_row, uint8_t *y_row, + uint8_t *u_row, uint8_t *v_row, bool &is_nv21, + size_t &index, bool &evenRow) { + const size_t vsize = 16; + // processing (2*vsize) pixels at once + uint8x16_t b0, b1, g0, g1, r0, r1; + load_rgb(b0, b1, g0, g1, r0, r1, src_row, index); + // Convert both vectors to luminance (Y channel) + uint8x16_t y0 = rgb_to_y(r0, g0, b0); + uint8x16_t y1 = rgb_to_y(r1, g1, b1); + + // Store Y values: 32 pixels total (two vectors) + vst1q_u8(y_row + index, y0); + vst1q_u8(y_row + index + vsize, y1); + + if (evenRow) { + uint8x16x2_t uv; + rgb_to_uv(r0, r1, g0, g1, b0, b1, uv.val[0], uv.val[1]); + if (is_nv21) { + std::swap(uv.val[0], uv.val[1]); + } + if constexpr (INTERLEAVE) { + vst2q_u8(u_row + index, uv); + } else { + vst1q_u8(u_row + (index >> 1), uv.val[0]); + vst1q_u8(v_row + (index >> 1), uv.val[1]); + } + } + } + + static void scalar_path(const uint8_t *src_row, uint8_t *y_row, + uint8_t *u_row, uint8_t *v_row, bool is_nv21, + size_t &index, size_t &length, bool evenRow) { + const size_t u_index_ = is_nv21 ? 1 : 0; + const size_t v_index_ = is_nv21 ? 0 : 1; + + for (; index < length; index += 1) { + uint8_t b0{}, g0{}, r0{}; + constexpr size_t scn = ALPHA ? 4 : 3; + bool evenCol = (index & 1) == 0; + b0 = src_row[index * scn + b_index_]; + g0 = src_row[index * scn + g_index_]; + r0 = src_row[index * scn + r_index_]; + + uint8_t y0 = rgb_to_y(r0, g0, b0); + y_row[index] = y0; + + if (evenRow && evenCol) { + uint8_t uv[2] = {0, 0}; + rgb_to_uv(r0, g0, b0, uv); + if constexpr (INTERLEAVE) { + u_row[index] = uv[u_index_]; + u_row[index + 1] = uv[v_index_]; + } else { + u_row[index >> 1] = uv[u_index_]; + v_row[index >> 1] = uv[v_index_]; + } + } + } + } + + static uint8_t rgb_to_y(uint8_t &r, uint8_t &g, uint8_t &b) { + const int shifted16 = (16 << kWeightScale); + const int halfShift = (1 << (kWeightScale - 1)); + int yy = + kRYWeight * r + kGYWeight * g + kBYWeight * b + halfShift + shifted16; + + return std::clamp(yy >> kWeightScale, 0, 0xff); + } + + static uint8x16_t rgb_to_y(const uint8x16_t &r, const uint8x16_t &g, + const uint8x16_t &b) { + const int shifted16 = (16 << kWeightScale); + const int halfShift = (1 << (kWeightScale - 1)); + + // Indices to extract every 4 bytes into 4x 32-bit slots (0xff = ignore) + // These are needed to expand each group of 4 bytes into a full 32-bit lane + uint8x16_t index_lo_lo = {0, 0xff, 0xff, 0xff, 1, 0xff, 0xff, 0xff, + 2, 0xff, 0xff, 0xff, 3, 0xff, 0xff, 0xff}; + + uint8x16_t index_lo_hi = {4, 0xff, 0xff, 0xff, 5, 0xff, 0xff, 0xff, + 6, 0xff, 0xff, 0xff, 7, 0xff, 0xff, 0xff}; + + uint8x16_t index_hi_lo = {8, 0xff, 0xff, 0xff, 9, 0xff, 0xff, 0xff, + 10, 0xff, 0xff, 0xff, 11, 0xff, 0xff, 0xff}; + + uint8x16_t index_hi_hi = {12, 0xff, 0xff, 0xff, 13, 0xff, 0xff, 0xff, + 14, 0xff, 0xff, 0xff, 15, 0xff, 0xff, 0xff}; + + // Expand each 8-bit channel into 32-bit vectors using table lookup and + // reinterpret + uint32x4_t r_lo_lo = vreinterpretq_u32_u8(vqtbl1q_u8(r, index_lo_lo)); + uint32x4_t r_lo_hi = vreinterpretq_u32_u8(vqtbl1q_u8(r, index_lo_hi)); + uint32x4_t r_hi_lo = vreinterpretq_u32_u8(vqtbl1q_u8(r, index_hi_lo)); + uint32x4_t r_hi_hi = vreinterpretq_u32_u8(vqtbl1q_u8(r, index_hi_hi)); + + uint32x4_t g_lo_lo = vreinterpretq_u32_u8(vqtbl1q_u8(g, index_lo_lo)); + uint32x4_t g_lo_hi = vreinterpretq_u32_u8(vqtbl1q_u8(g, index_lo_hi)); + uint32x4_t g_hi_lo = vreinterpretq_u32_u8(vqtbl1q_u8(g, index_hi_lo)); + uint32x4_t g_hi_hi = vreinterpretq_u32_u8(vqtbl1q_u8(g, index_hi_hi)); + + uint32x4_t b_lo_lo = vreinterpretq_u32_u8(vqtbl1q_u8(b, index_lo_lo)); + uint32x4_t b_lo_hi = vreinterpretq_u32_u8(vqtbl1q_u8(b, index_lo_hi)); + uint32x4_t b_hi_lo = vreinterpretq_u32_u8(vqtbl1q_u8(b, index_hi_lo)); + uint32x4_t b_hi_hi = vreinterpretq_u32_u8(vqtbl1q_u8(b, index_hi_hi)); + + // Prepare constants for fixed-point MAC (multiply-accumulate) + uint32x4_t v_kRYWeight = vdupq_n_u32(kRYWeight); + uint32x4_t v_kGYWeight = vdupq_n_u32(kGYWeight); + uint32x4_t v_kBYWeight = vdupq_n_u32(kBYWeight); + uint32x4_t y_lo_lo = vdupq_n_u32(halfShift + shifted16); + uint32x4_t y_lo_hi = vdupq_n_u32(halfShift + shifted16); + uint32x4_t y_hi_lo = vdupq_n_u32(halfShift + shifted16); + uint32x4_t y_hi_hi = vdupq_n_u32(halfShift + shifted16); + + // Apply Y = kR*R + kG*G + kB*B + rounding bias + y_lo_lo = vmlaq_u32(y_lo_lo, r_lo_lo, v_kRYWeight); + y_lo_hi = vmlaq_u32(y_lo_hi, r_lo_hi, v_kRYWeight); + y_hi_lo = vmlaq_u32(y_hi_lo, r_hi_lo, v_kRYWeight); + y_hi_hi = vmlaq_u32(y_hi_hi, r_hi_hi, v_kRYWeight); + + y_lo_lo = vmlaq_u32(y_lo_lo, g_lo_lo, v_kGYWeight); + y_lo_hi = vmlaq_u32(y_lo_hi, g_lo_hi, v_kGYWeight); + y_hi_lo = vmlaq_u32(y_hi_lo, g_hi_lo, v_kGYWeight); + y_hi_hi = vmlaq_u32(y_hi_hi, g_hi_hi, v_kGYWeight); + + y_lo_lo = vmlaq_u32(y_lo_lo, b_lo_lo, v_kBYWeight); + y_lo_hi = vmlaq_u32(y_lo_hi, b_lo_hi, v_kBYWeight); + y_hi_lo = vmlaq_u32(y_hi_lo, b_hi_lo, v_kBYWeight); + y_hi_hi = vmlaq_u32(y_hi_hi, b_hi_hi, v_kBYWeight); + + // Normalize down by right-shifting the fixed-point result + y_lo_lo = vshrq_n_u32(y_lo_lo, kWeightScale); + y_lo_hi = vshrq_n_u32(y_lo_hi, kWeightScale); + y_hi_lo = vshrq_n_u32(y_hi_lo, kWeightScale); + y_hi_hi = vshrq_n_u32(y_hi_hi, kWeightScale); + + // Pack the result into 8-bit vector lanes + uint8x16x4_t y; + y.val[0] = vreinterpretq_u8_u32(y_lo_lo); + y.val[1] = vreinterpretq_u8_u32(y_lo_hi); + y.val[2] = vreinterpretq_u8_u32(y_hi_lo); + y.val[3] = vreinterpretq_u8_u32(y_hi_hi); + + // Final shuffle to extract the first byte of each lane into a flat vector + uint8x16_t index = {0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60}; + uint8x16_t output = vqtbl4q_u8(y, index); + + return output; + } + + static void rgb_to_uv(uint8_t &r, uint8_t &g, uint8_t &b, uint8_t *uv) { + const int halfShift = (1 << (kWeightScale - 1)); + const int shifted128 = (128 << kWeightScale); + int uu = + kRUWeight * r + kGUWeight * g + kBUWeight * b + halfShift + shifted128; + int vv = + kBUWeight * r + kGVWeight * g + kBVWeight * b + halfShift + shifted128; + + uv[0] = std::clamp(uu >> kWeightScale, 0, 0xff); + uv[1] = std::clamp(vv >> kWeightScale, 0, 0xff); + } + + static void rgb_to_uv(const uint8x16_t &r0, const uint8x16_t &r1, + const uint8x16_t &g0, const uint8x16_t &g1, + const uint8x16_t &b0, const uint8x16_t &b1, + uint8x16_t &u, uint8x16_t &v) { + // NEON lookup indices to extract even-indexed bytes into 32-bit lanes + uint8x16_t index_lo = {0, 0xff, 0xff, 0xff, 2, 0xff, 0xff, 0xff, + 4, 0xff, 0xff, 0xff, 6, 0xff, 0xff, 0xff}; + + uint8x16_t index_hi = {8, 0xff, 0xff, 0xff, 10, 0xff, 0xff, 0xff, + 12, 0xff, 0xff, 0xff, 14, 0xff, 0xff, 0xff}; + + // Extend RGB0 and RGB1 from uint8 to int32 using table lookups and + // reinterpret casts + int32x4_t r_lo_lo = vreinterpretq_s32_u8(vqtbl1q_u8(r0, index_lo)); + int32x4_t r_lo_hi = vreinterpretq_s32_u8(vqtbl1q_u8(r0, index_hi)); + int32x4_t r_hi_lo = vreinterpretq_s32_u8(vqtbl1q_u8(r1, index_lo)); + int32x4_t r_hi_hi = vreinterpretq_s32_u8(vqtbl1q_u8(r1, index_hi)); + + int32x4_t g_lo_lo = vreinterpretq_s32_u8(vqtbl1q_u8(g0, index_lo)); + int32x4_t g_lo_hi = vreinterpretq_s32_u8(vqtbl1q_u8(g0, index_hi)); + int32x4_t g_hi_lo = vreinterpretq_s32_u8(vqtbl1q_u8(g1, index_lo)); + int32x4_t g_hi_hi = vreinterpretq_s32_u8(vqtbl1q_u8(g1, index_hi)); + + int32x4_t b_lo_lo = vreinterpretq_s32_u8(vqtbl1q_u8(b0, index_lo)); + int32x4_t b_lo_hi = vreinterpretq_s32_u8(vqtbl1q_u8(b0, index_hi)); + int32x4_t b_hi_lo = vreinterpretq_s32_u8(vqtbl1q_u8(b1, index_lo)); + int32x4_t b_hi_hi = vreinterpretq_s32_u8(vqtbl1q_u8(b1, index_hi)); + + // Constants for U/V calculation + const int halfShift = (1 << (kWeightScale - 1)); + const int shifted128 = (128 << kWeightScale); + + // ---------------- U (Cb) Component ---------------- + int32x4_t v_kRUWeight = vdupq_n_s32(kRUWeight); + int32x4_t v_kGUWeight = vdupq_n_s32(kGUWeight); + int32x4_t v_kBUWeight = vdupq_n_s32(kBUWeight); + + // Initialize accumulation with bias + int32x4_t u_lo_lo = vdupq_n_s32(halfShift + shifted128); + int32x4_t u_lo_hi = vdupq_n_s32(halfShift + shifted128); + int32x4_t u_hi_lo = vdupq_n_s32(halfShift + shifted128); + int32x4_t u_hi_hi = vdupq_n_s32(halfShift + shifted128); + + // U = R * kRU + G * kGU + B * kBU + bias + u_lo_lo = vmlaq_s32(u_lo_lo, r_lo_lo, v_kRUWeight); + u_lo_hi = vmlaq_s32(u_lo_hi, r_lo_hi, v_kRUWeight); + u_hi_lo = vmlaq_s32(u_hi_lo, r_hi_lo, v_kRUWeight); + u_hi_hi = vmlaq_s32(u_hi_hi, r_hi_hi, v_kRUWeight); + + u_lo_lo = vmlaq_s32(u_lo_lo, g_lo_lo, v_kGUWeight); + u_lo_hi = vmlaq_s32(u_lo_hi, g_lo_hi, v_kGUWeight); + u_hi_lo = vmlaq_s32(u_hi_lo, g_hi_lo, v_kGUWeight); + u_hi_hi = vmlaq_s32(u_hi_hi, g_hi_hi, v_kGUWeight); + + u_lo_lo = vmlaq_s32(u_lo_lo, b_lo_lo, v_kBUWeight); + u_lo_hi = vmlaq_s32(u_lo_hi, b_lo_hi, v_kBUWeight); + u_hi_lo = vmlaq_s32(u_hi_lo, b_hi_lo, v_kBUWeight); + u_hi_hi = vmlaq_s32(u_hi_hi, b_hi_hi, v_kBUWeight); + + // Normalize to 8-bit by shifting + u_lo_lo = vshrq_n_s32(u_lo_lo, kWeightScale); + u_lo_hi = vshrq_n_s32(u_lo_hi, kWeightScale); + u_hi_lo = vshrq_n_s32(u_hi_lo, kWeightScale); + u_hi_hi = vshrq_n_s32(u_hi_hi, kWeightScale); + + // Pack into single 16-byte vector + uint8x16x4_t output; + output.val[0] = vreinterpretq_u8_s32(u_lo_lo); + output.val[1] = vreinterpretq_u8_s32(u_lo_hi); + output.val[2] = vreinterpretq_u8_s32(u_hi_lo); + output.val[3] = vreinterpretq_u8_s32(u_hi_hi); + + // Index vector for reordering bytes into linear output + uint8x16_t index = {0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60}; + + u = vqtbl4q_u8(output, index); + + // ---------------- V (Cr) Component ---------------- + int32x4_t v_kGVWeight = vdupq_n_s32(kGVWeight); + int32x4_t v_kBVWeight = vdupq_n_s32(kBVWeight); + v_kRUWeight = vdupq_n_s32(kBUWeight); + + int32x4_t v_lo_lo = vdupq_n_s32(halfShift + shifted128); + int32x4_t v_lo_hi = vdupq_n_s32(halfShift + shifted128); + int32x4_t v_hi_lo = vdupq_n_s32(halfShift + shifted128); + int32x4_t v_hi_hi = vdupq_n_s32(halfShift + shifted128); + + // V = R * kBU + G * kGV + B * kBV + bias + v_lo_lo = vmlaq_s32(v_lo_lo, r_lo_lo, v_kRUWeight); + v_lo_hi = vmlaq_s32(v_lo_hi, r_lo_hi, v_kRUWeight); + v_hi_lo = vmlaq_s32(v_hi_lo, r_hi_lo, v_kRUWeight); + v_hi_hi = vmlaq_s32(v_hi_hi, r_hi_hi, v_kRUWeight); + + v_lo_lo = vmlaq_s32(v_lo_lo, g_lo_lo, v_kGVWeight); + v_lo_hi = vmlaq_s32(v_lo_hi, g_lo_hi, v_kGVWeight); + v_hi_lo = vmlaq_s32(v_hi_lo, g_hi_lo, v_kGVWeight); + v_hi_hi = vmlaq_s32(v_hi_hi, g_hi_hi, v_kGVWeight); + + v_lo_lo = vmlaq_s32(v_lo_lo, b_lo_lo, v_kBVWeight); + v_lo_hi = vmlaq_s32(v_lo_hi, b_lo_hi, v_kBVWeight); + v_hi_lo = vmlaq_s32(v_hi_lo, b_hi_lo, v_kBVWeight); + v_hi_hi = vmlaq_s32(v_hi_hi, b_hi_hi, v_kBVWeight); + + // Normalize + v_lo_lo = vshrq_n_s32(v_lo_lo, kWeightScale); + v_lo_hi = vshrq_n_s32(v_lo_hi, kWeightScale); + v_hi_lo = vshrq_n_s32(v_hi_lo, kWeightScale); + v_hi_hi = vshrq_n_s32(v_hi_hi, kWeightScale); + + // Pack and shuffle + output.val[0] = vreinterpretq_u8_s32(v_lo_lo); + output.val[1] = vreinterpretq_u8_s32(v_lo_hi); + output.val[2] = vreinterpretq_u8_s32(v_hi_lo); + output.val[3] = vreinterpretq_u8_s32(v_hi_hi); + + v = vqtbl4q_u8(output, index); + } + + static void load_rgb(uint8x16_t &b0, uint8x16_t &b1, uint8x16_t &g0, + uint8x16_t &g1, uint8x16_t &r0, uint8x16_t &r1, + const uint8_t *src_row, size_t &index) { + // Load 32 pixels: two vectors of interleaved channels + const size_t vsize = 16; + constexpr size_t scn = ALPHA ? 4 : 3; + if constexpr (ALPHA) { + // 4-channel input (e.g., RGBA or BGRA) + uint8x16x4_t vsrc0 = vld4q_u8(src_row + scn * index); + uint8x16x4_t vsrc1 = vld4q_u8(src_row + scn * index + scn * vsize); + + b0 = vsrc0.val[b_index_]; + g0 = vsrc0.val[g_index_]; + r0 = vsrc0.val[r_index_]; + + b1 = vsrc1.val[b_index_]; + g1 = vsrc1.val[g_index_]; + r1 = vsrc1.val[r_index_]; + } else { + // 3-channel input (e.g., RGB or BGR) + uint8x16x3_t vsrc0 = vld3q_u8(src_row + scn * index); + uint8x16x3_t vsrc1 = vld3q_u8(src_row + scn * index + scn * vsize); + + b0 = vsrc0.val[b_index_]; + g0 = vsrc0.val[g_index_]; + r0 = vsrc0.val[r_index_]; + + b1 = vsrc1.val[b_index_]; + g1 = vsrc1.val[g_index_]; + r1 = vsrc1.val[r_index_]; + } + } + + static constexpr size_t r_index_ = RGB ? 0 : 2; + static constexpr size_t g_index_ = 1; + static constexpr size_t b_index_ = RGB ? 2 : 0; +}; + +} // namespace kleidicv::neon + +#endif // KLEIDICV_RGB_TO_YUV420_H diff --git a/kleidicv/src/conversions/rgb_to_yuv420p_api.cpp b/kleidicv/src/conversions/rgb_to_yuv420p_api.cpp new file mode 100644 index 000000000..cbdbc587a --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420p_api.cpp @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" + +#define KLEIDICV_DEFINE_C_API(name, partialname) \ + KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::partialname, nullptr, \ + nullptr, nullptr) + +KLEIDICV_DEFINE_C_API(kleidicv_rgb_to_yuv420_p_stripe_u8, + rgb_to_yuv420_p_stripe_u8); + +KLEIDICV_DEFINE_C_API(kleidicv_rgba_to_yuv420_p_stripe_u8, + rgba_to_yuv420_p_stripe_u8); + +KLEIDICV_DEFINE_C_API(kleidicv_bgr_to_yuv420_p_stripe_u8, + bgr_to_yuv420_p_stripe_u8); + +KLEIDICV_DEFINE_C_API(kleidicv_bgra_to_yuv420_p_stripe_u8, + bgra_to_yuv420_p_stripe_u8); + +extern "C" { + +kleidicv_error_t kleidicv_rgb_to_yuv420_p_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_rgb_to_yuv420_p_stripe_u8(src, src_stride, dst, dst_stride, + width, height, is_nv21, 0, height); +} + +kleidicv_error_t kleidicv_rgba_to_yuv420_p_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_rgba_to_yuv420_p_stripe_u8(src, src_stride, dst, dst_stride, + width, height, is_nv21, 0, height); +} + +kleidicv_error_t kleidicv_bgr_to_yuv420_p_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_bgr_to_yuv420_p_stripe_u8(src, src_stride, dst, dst_stride, + width, height, is_nv21, 0, height); +} + +kleidicv_error_t kleidicv_bgra_to_yuv420_p_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_bgra_to_yuv420_p_stripe_u8(src, src_stride, dst, dst_stride, + width, height, is_nv21, 0, height); +} + +} // extern "C" diff --git a/kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp b/kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp new file mode 100644 index 000000000..30878b9af --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" +#include "rgb_to_yuv420_neon.h" + +namespace kleidicv::neon { + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgb_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgba_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgr_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgra_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +} // namespace kleidicv::neon diff --git a/kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp b/kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp new file mode 100644 index 000000000..dfc1769eb --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" + +#define KLEIDICV_DEFINE_C_API(name, partialname) \ + KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::partialname, nullptr, \ + nullptr, nullptr) + +KLEIDICV_DEFINE_C_API(kleidicv_rgb_to_yuv420_sp_stripe_u8, + rgb_to_yuv420_sp_stripe_u8); + +KLEIDICV_DEFINE_C_API(kleidicv_rgba_to_yuv420_sp_stripe_u8, + rgba_to_yuv420_sp_stripe_u8); + +KLEIDICV_DEFINE_C_API(kleidicv_bgr_to_yuv420_sp_stripe_u8, + bgr_to_yuv420_sp_stripe_u8); + +KLEIDICV_DEFINE_C_API(kleidicv_bgra_to_yuv420_sp_stripe_u8, + bgra_to_yuv420_sp_stripe_u8); + +extern "C" { + +kleidicv_error_t kleidicv_rgb_to_yuv420_sp_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_rgb_to_yuv420_sp_stripe_u8(src, src_stride, y_dst, y_stride, + uv_dst, uv_stride, width, height, + is_nv21, 0, height); +} + +kleidicv_error_t kleidicv_rgba_to_yuv420_sp_u8(const uint8_t *src, + size_t src_stride, + uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_rgba_to_yuv420_sp_stripe_u8(src, src_stride, y_dst, y_stride, + uv_dst, uv_stride, width, height, + is_nv21, 0, height); +} + +kleidicv_error_t kleidicv_bgr_to_yuv420_sp_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_bgr_to_yuv420_sp_stripe_u8(src, src_stride, y_dst, y_stride, + uv_dst, uv_stride, width, height, + is_nv21, 0, height); +} + +kleidicv_error_t kleidicv_bgra_to_yuv420_sp_u8(const uint8_t *src, + size_t src_stride, + uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21) { + return kleidicv_bgra_to_yuv420_sp_stripe_u8(src, src_stride, y_dst, y_stride, + uv_dst, uv_stride, width, height, + is_nv21, 0, height); +} + +} // extern "C" diff --git a/kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp b/kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp new file mode 100644 index 000000000..5821ff488 --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" +#include "rgb_to_yuv420_neon.h" + +namespace kleidicv::neon { + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgb_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgba_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgr_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgra_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +} // namespace kleidicv::neon diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index df51c3068..1b2c7f941 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -107,6 +107,71 @@ kleidicv_error_t kleidicv_thread_yuv_p_to_rgb_u8( kleidicv_error_t kleidicv_thread_yuv_p_to_rgba_u8( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, bool v_first, kleidicv_thread_multithreading); +/// Multithreaded implementation of kleidicv_rgb_to_yuv420_p_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_rgb_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, kleidicv_thread_multithreading); + +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_rgba_to_yuv420_p_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_rgba_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, kleidicv_thread_multithreading); + +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_bgr_to_yuv420_p_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_bgr_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, kleidicv_thread_multithreading); + +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_bgra_to_yuv420_p_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_bgra_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, kleidicv_thread_multithreading); + +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_rgb_to_yuv420_sp_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_rgb_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading); + +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_rgba_to_yuv420_sp_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_rgba_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading); + +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_bgr_to_yuv420_sp_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_bgr_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading); + +/// Internal - not part of the public API and its direct use is not supported. +/// +/// Multithreaded implementation of kleidicv_bgra_to_yuv420_sp_u8 - see the +/// documentation of that function for more details. +kleidicv_error_t kleidicv_thread_bgra_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading); /// Internal - not part of the public API and its direct use is not supported. /// diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index 875091a99..869df8af4 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -13,6 +13,7 @@ #include "kleidicv/arithmetics/rotate.h" #include "kleidicv/arithmetics/scale.h" +#include "kleidicv/conversions/rgb_to_yuv_420.h" #include "kleidicv/conversions/yuv_420_to_rgb.h" #include "kleidicv/ctypes.h" #include "kleidicv/filters/blur_and_downsample.h" @@ -299,6 +300,102 @@ kleidicv_error_t kleidicv_thread_yuv_p_to_rgba_u8( return parallel_batches(callback, mt, (height + 1) / 2); } +kleidicv_error_t kleidicv_thread_rgb_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, + kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_rgb_to_yuv420_p_stripe_u8( + src, src_stride, dst, dst_stride, width, height, is_nv21, + static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + +kleidicv_error_t kleidicv_thread_rgba_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, + kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_rgba_to_yuv420_p_stripe_u8( + src, src_stride, dst, dst_stride, width, height, is_nv21, + static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + +kleidicv_error_t kleidicv_thread_bgr_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, + kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_bgr_to_yuv420_p_stripe_u8( + src, src_stride, dst, dst_stride, width, height, is_nv21, + static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + +kleidicv_error_t kleidicv_thread_bgra_to_yuv420_p_u8( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height, bool is_nv21, + kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_bgra_to_yuv420_p_stripe_u8( + src, src_stride, dst, dst_stride, width, height, is_nv21, + static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + +kleidicv_error_t kleidicv_thread_rgb_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_rgb_to_yuv420_sp_stripe_u8( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + +kleidicv_error_t kleidicv_thread_rgba_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_rgba_to_yuv420_sp_stripe_u8( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + +kleidicv_error_t kleidicv_thread_bgr_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_bgr_to_yuv420_sp_stripe_u8( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + +kleidicv_error_t kleidicv_thread_bgra_to_yuv420_sp_u8( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, kleidicv_thread_multithreading mt) { + auto callback = [=](unsigned begin, unsigned end) { + return kleidicv_bgra_to_yuv420_sp_stripe_u8( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, static_cast(begin), static_cast(end)); + }; + return parallel_batches(callback, mt, (height + 1) / 2); +} + template inline kleidicv_error_t kleidicv_thread_yuv_sp_to_rgb_u8_impl( F f, const uint8_t *src_y, size_t src_y_stride, const uint8_t *src_uv, diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index 1606bc649..859851491 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -21,6 +21,11 @@ YUVP2BGRA: opencv_perf_imgproc '*cvtColorYUV420/*' '($PIXEL_FORMAT, COLOR_YUV2B YUVP2RGB: opencv_perf_imgproc '*cvtColorYUV420/*' '($PIXEL_FORMAT, COLOR_YUV2RGB_YV12)' YUVP2RGBA: opencv_perf_imgproc '*cvtColorYUV420/*' '($PIXEL_FORMAT, COLOR_YUV2RGBA_YV12)' +RGB2YUVP: opencv_perf_imgproc '*cvtColorRGB2YUV420p/*' '($PIXEL_FORMAT, COLOR_RGB2YUV_YV12)' +RGBA2YUVP: opencv_perf_imgproc '*cvtColorRGB2YUV420p/*' '($PIXEL_FORMAT, COLOR_RGBA2YUV_YV12)' +BGR2YUVP: opencv_perf_imgproc '*cvtColorRGB2YUV420p/*' '($PIXEL_FORMAT, COLOR_BGR2YUV_YV12)' +BGRA2YUVP: opencv_perf_imgproc '*cvtColorRGB2YUV420p/*' '($PIXEL_FORMAT, COLOR_BGRA2YUV_YV12)' + RGB2YUV: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, COLOR_RGB2YUV)' BGR2YUV: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, COLOR_BGR2YUV)' RGBA2YUV: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, CX_RGBA2YUV)' diff --git a/scripts/ci-opencv.sh b/scripts/ci-opencv.sh index db777ecc3..98a49f83d 100755 --- a/scripts/ci-opencv.sh +++ b/scripts/ci-opencv.sh @@ -106,6 +106,10 @@ IMGPROC_TEST_PATTERNS=( '*Imgproc_ColorYUV*' '*Imgproc_cvtColor_BE.COLOR_YUV*' '*Imgproc_cvtColor_BE.COLOR_RGB2YUV' + '*Imgproc_cvtColor_BE.COLOR_RGB2YUV_YV12' + '*Imgproc_cvtColor_BE.COLOR_BGR2YUV_YV12' + '*Imgproc_cvtColor_BE.COLOR_RGBA2YUV_YV12' + '*Imgproc_cvtColor_BE.COLOR_BGRA2YUV_YV12' '*Imgproc_Threshold*' '*Imgproc_Morphology*' '*Imgproc_GaussianBlur*' diff --git a/test/api/test_rgb_to_yuv_420_p.cpp b/test/api/test_rgb_to_yuv_420_p.cpp new file mode 100644 index 000000000..c351699ef --- /dev/null +++ b/test/api/test_rgb_to_yuv_420_p.cpp @@ -0,0 +1,238 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "kleidicv/kleidicv.h" +#include "test_config.h" + +class RGB2YUV420pTest : public testing::Test { + public: + struct TestParams { + size_t width; + size_t src_padding; + size_t dst_padding; + size_t height; + size_t channels; + bool is_nv21; + bool is_rgb; + }; + + static std::vector generate_test_cases( + const std::vector& widths, + const std::vector& src_paddings, + const std::vector& dst_paddings, + const std::vector& heights, const std::vector& channels, + const std::vector& uv_cases, + const std::vector& output_image_case) { + std::vector cases; + + for (size_t w : widths) { + for (size_t src_pad : src_paddings) { + for (size_t dst_pad : dst_paddings) { + for (size_t h : heights) { + for (size_t c : channels) { + for (bool uv_case : uv_cases) { + for (bool is_rgb : output_image_case) { + cases.push_back({w, src_pad, dst_pad, h, c, uv_case, is_rgb}); + } + } + } + } + } + } + } + + return cases; + } + + static std::vector get_test_cases() { + std::vector widths = {1, 2, 4, 6, 18, 27, 32, 64, 3}; + std::vector src_paddings = {0}; + std::vector dst_paddings = {0}; + std::vector heights = {2, 5, 11, 16}; + std::vector channels = {3, 4}; + std::vector uv_cases = {true, false}; + std::vector output_image_case = {true, false}; + return generate_test_cases(widths, src_paddings, dst_paddings, heights, + channels, uv_cases, output_image_case); + } + + void run_test_case(const TestParams& params) { + test::Array2D src{params.width * params.channels, params.height, + params.src_padding, params.channels}; + + test::Array2D expected_dst{ + params.width, (params.height * 3 + 1) / 2, params.dst_padding}; + + test::Array2D dst{params.width, (params.height * 3 + 1) / 2, + params.dst_padding}; + + test::PseudoRandomNumberGenerator input_value_random_range; + src.fill(input_value_random_range); + + calculate_reference(src.data(), src.stride(), expected_dst.data(), + expected_dst.stride(), params.width, params.height, + params.is_nv21, params.is_rgb, params.channels); + + auto status = KLEIDICV_OK; + + if (params.channels == 3) { + if (params.is_rgb) { + status = kleidicv_rgb_to_yuv420_p_u8( + src.data(), src.stride(), dst.data(), dst.stride(), params.width, + params.height, params.is_nv21); + } else { + status = kleidicv_bgr_to_yuv420_p_u8( + src.data(), src.stride(), dst.data(), dst.stride(), params.width, + params.height, params.is_nv21); + } + } + + if (params.channels == 4) { + if (params.is_rgb) { + status = kleidicv_rgba_to_yuv420_p_u8( + src.data(), src.stride(), dst.data(), dst.stride(), params.width, + params.height, params.is_nv21); + } else { + status = kleidicv_bgra_to_yuv420_p_u8( + src.data(), src.stride(), dst.data(), dst.stride(), params.width, + params.height, params.is_nv21); + } + } + + EXPECT_EQ(KLEIDICV_OK, status); + EXPECT_EQ_ARRAY2D(expected_dst, dst); + } + + template + void run_unsupported(Func impl, size_t channels, bool is_nv21) { + test::Array2D src{20 * channels, 10, 0, channels}; + test::Array2D dst{20, (10 * 3 + 1) / 2}; + + test::test_null_args(impl, src.data(), src.stride(), dst.data(), + dst.stride(), dst.width(), dst.height(), is_nv21); + + EXPECT_EQ(KLEIDICV_OK, impl(src.data(), src.stride(), dst.data(), + dst.stride(), 0, 1, is_nv21)); + + EXPECT_EQ(KLEIDICV_OK, impl(src.data(), src.stride(), dst.data(), + dst.stride(), 1, 0, is_nv21)); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + impl(src.data(), src.stride(), dst.data(), dst.stride(), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, is_nv21)); + EXPECT_EQ( + KLEIDICV_ERROR_RANGE, + impl(src.data(), src.stride(), dst.data(), dst.stride(), + KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS, is_nv21)); + } + + private: + // Coefficients for RGB to YUV420p conversion + static const int kWeightScale = 20; + static const int kRYWeight = + 269484; // 0.299055 * (236-16)/256 * (1 << kWeightScale) + static const int kGYWeight = + 528482; // 0.586472 * (236-16)/256 * (1 << kWeightScale) + static const int kBYWeight = + 102760; // 0.114035 * (236-16)/256 * (1 << kWeightScale) + static const int kRUWeight = -155188; // -0.148 * (1 << (kWeightScale-1)) + static const int kGUWeight = -305135; // -0.291 * (1 << (kWeightScale-1)) + static const int kBUWeight = 460324; // 0.439 * (1 << (kWeightScale-1)) + static const int kGVWeight = -385875; // -0.368 * (1 << (kWeightScale-1)) + static const int kBVWeight = -74448; // -0.071 * (1 << (kWeightScale-1)) + static uint8_t saturate_cast_s32_to_u8(int32_t rhs) { + return static_cast( + std::min(std::max(0, rhs), + static_cast(std::numeric_limits::max()))); + } + uint8_t rgb_to_y(uint8_t r, uint8_t g, uint8_t b) { + const int shifted16 = (16 << kWeightScale); + const int halfShift = (1 << (kWeightScale - 1)); + int yy = + kRYWeight * r + kGYWeight * g + kBYWeight * b + halfShift + shifted16; + + return std::clamp(yy >> kWeightScale, 0, 0xff); + } + + static void rgb_to_uv(uint8_t r, uint8_t g, uint8_t b, uint8_t& u, + uint8_t& v) { + const int halfShift = (1 << (kWeightScale - 1)); + const int shifted128 = (128 << kWeightScale); + int uu = + kRUWeight * r + kGUWeight * g + kBUWeight * b + halfShift + shifted128; + int vv = + kBUWeight * r + kGVWeight * g + kBVWeight * b + halfShift + shifted128; + + u = std::clamp(uu >> kWeightScale, 0, 0xff); + v = std::clamp(vv >> kWeightScale, 0, 0xff); + } + void calculate_reference(const uint8_t* src, size_t src_stride, uint8_t* dst, + size_t dst_stride, size_t width, size_t height, + bool is_nv21, bool RGB, size_t channels) { + const uint8_t* src_row = nullptr; + uint8_t* uv_data = dst + dst_stride * height; + uint8_t* y_row = nullptr; + uint8_t* u_row = nullptr; + uint8_t* v_row = nullptr; + + for (size_t h = 0; h < height; h++) { + src_row = src + src_stride * h; + y_row = dst + dst_stride * h; + + bool evenRow = (h % 2) == 0; + if (evenRow) { + u_row = uv_data + dst_stride * (h / 4) + ((h / 2) % 2) * (width / 2); + v_row = uv_data + dst_stride * ((h + height + 1) / 4) + + (((h + height + 1) / 2) % 2) * (width / 2); + } + + for (size_t w = 0; w < width; w++) { + uint8_t b0{}, g0{}, r0{}; + b0 = src_row[w * channels + 0]; + g0 = src_row[w * channels + 1]; + r0 = src_row[w * channels + 2]; + if (RGB) { + std::swap(b0, r0); + } + uint8_t y0 = rgb_to_y(r0, g0, b0); + y_row[w] = y0; + bool evenCol = (w % 2) == 0; + if (evenRow && evenCol) { + uint8_t uu{}, vv{}; + rgb_to_uv(r0, g0, b0, uu, vv); + if (is_nv21) { + std::swap(uu, vv); + } + u_row[w >> 1] = uu; + v_row[w >> 1] = vv; + } + } + } + } +}; + +TEST_F(RGB2YUV420pTest, ConvertspaddedInputsWithAllParamCombinations) { + for (const auto& params : get_test_cases()) { + run_test_case(params); + } +} + +TEST_F(RGB2YUV420pTest, ReturnsErrorForUnsupportedCombinations) { + run_unsupported(kleidicv_rgb_to_yuv420_p_u8, 3, true); + run_unsupported(kleidicv_rgba_to_yuv420_p_u8, 4, true); + run_unsupported(kleidicv_bgr_to_yuv420_p_u8, 3, true); + run_unsupported(kleidicv_bgra_to_yuv420_p_u8, 4, true); + run_unsupported(kleidicv_rgb_to_yuv420_p_u8, 3, false); + run_unsupported(kleidicv_rgba_to_yuv420_p_u8, 4, false); + run_unsupported(kleidicv_bgr_to_yuv420_p_u8, 3, false); + run_unsupported(kleidicv_bgra_to_yuv420_p_u8, 4, false); +} diff --git a/test/api/test_rgb_to_yuv_420_sp.cpp b/test/api/test_rgb_to_yuv_420_sp.cpp new file mode 100644 index 000000000..83f7b1723 --- /dev/null +++ b/test/api/test_rgb_to_yuv_420_sp.cpp @@ -0,0 +1,256 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/utils.h" +#include "test_config.h" + +class RGB2YUV420SpTest : public testing::Test { + public: + struct TestParams { + size_t width; + size_t src_padding; + size_t dst_padding; + size_t height; + size_t channels; + bool is_nv21; + bool is_rgb; + }; + + static std::vector generate_test_cases( + const std::vector& widths, + const std::vector& src_paddings, + const std::vector& dst_paddings, + const std::vector& heights, const std::vector& channels, + const std::vector& uv_cases, + const std::vector& output_image_case) { + std::vector cases; + + for (size_t w : widths) { + for (size_t src_pad : src_paddings) { + for (size_t dst_pad : dst_paddings) { + for (size_t h : heights) { + for (size_t c : channels) { + for (bool uv_case : uv_cases) { + for (bool is_rgb : output_image_case) { + cases.push_back({w, src_pad, dst_pad, h, c, uv_case, is_rgb}); + } + } + } + } + } + } + } + + return cases; + } + + static std::vector get_test_cases() { + std::vector widths = {1, 2, 4, 6, 18, 27, 32, 64, 3}; + std::vector src_paddings = {0}; + std::vector dst_paddings = {0}; + std::vector heights = {2, 5, 11, 16}; + std::vector channels = {3, 4}; + std::vector uv_cases = {true, false}; + std::vector output_image_case = {true, false}; + return generate_test_cases(widths, src_paddings, dst_paddings, heights, + channels, uv_cases, output_image_case); + } + + void run_test_case(const TestParams& params) { + test::Array2D src{params.width * params.channels, params.height, + params.src_padding, params.channels}; + + test::Array2D expected_y_dst{params.width, params.height, + params.dst_padding}; + + test::Array2D expected_uv_dst{ + KLEIDICV_TARGET_NAMESPACE::align_up(params.width, 2), + (params.height + 1) / 2, params.dst_padding}; + + test::Array2D y_dst{params.width, params.height, + params.dst_padding}; + + test::Array2D uv_dst{ + KLEIDICV_TARGET_NAMESPACE::align_up(params.width, 2), + (params.height + 1) / 2, params.dst_padding}; + + test::PseudoRandomNumberGenerator input_value_random_range; + src.fill(input_value_random_range); + + calculate_reference(src.data(), src.stride(), expected_y_dst.data(), + expected_y_dst.stride(), expected_uv_dst.data(), + expected_uv_dst.stride(), params.width, params.height, + params.is_nv21, params.is_rgb, params.channels); + + auto status = KLEIDICV_OK; + + if (params.channels == 3) { + if (params.is_rgb) { + status = kleidicv_rgb_to_yuv420_sp_u8( + src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), params.width, params.height, + params.is_nv21); + } else { + status = kleidicv_bgr_to_yuv420_sp_u8( + src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), params.width, params.height, + params.is_nv21); + } + } + + if (params.channels == 4) { + if (params.is_rgb) { + status = kleidicv_rgba_to_yuv420_sp_u8( + src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), params.width, params.height, + params.is_nv21); + } else { + status = kleidicv_bgra_to_yuv420_sp_u8( + src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), params.width, params.height, + params.is_nv21); + } + } + + EXPECT_EQ(KLEIDICV_OK, status); + EXPECT_EQ_ARRAY2D(expected_y_dst, y_dst); + EXPECT_EQ_ARRAY2D(expected_uv_dst, uv_dst); + } + + template + void run_unsupported(Func impl, size_t channels, bool is_nv21) { + test::Array2D src{20 * channels, 10, 0, channels}; + test::Array2D y_dst{20, 10}; + test::Array2D uv_dst{20, 5}; + + test::test_null_args(impl, src.data(), src.stride(), y_dst.data(), + y_dst.stride(), uv_dst.data(), uv_dst.stride(), + src.width(), src.height(), is_nv21); + + EXPECT_EQ(KLEIDICV_OK, + impl(src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), 0, 1, is_nv21)); + + EXPECT_EQ(KLEIDICV_OK, + impl(src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), 1, 0, is_nv21)); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + impl(src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1, is_nv21)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + impl(src.data(), src.stride(), y_dst.data(), y_dst.stride(), + uv_dst.data(), uv_dst.stride(), KLEIDICV_MAX_IMAGE_PIXELS, + KLEIDICV_MAX_IMAGE_PIXELS, is_nv21)); + } + + private: + // Coefficients for RGB to YUV420p conversion + static const int kWeightScale = 20; + static const int kRYWeight = + 269484; // 0.299055 * (236-16)/256 * (1 << kWeightScale) + static const int kGYWeight = + 528482; // 0.586472 * (236-16)/256 * (1 << kWeightScale) + static const int kBYWeight = + 102760; // 0.114035 * (236-16)/256 * (1 << kWeightScale) + static const int kRUWeight = -155188; // -0.148 * (1 << (kWeightScale-1)) + static const int kGUWeight = -305135; // -0.291 * (1 << (kWeightScale-1)) + static const int kBUWeight = 460324; // 0.439 * (1 << (kWeightScale-1)) + static const int kGVWeight = -385875; // -0.368 * (1 << (kWeightScale-1)) + static const int kBVWeight = -74448; // -0.071 * (1 << (kWeightScale-1)) + static uint8_t saturate_cast_s32_to_u8(int32_t rhs) { + return static_cast( + std::min(std::max(0, rhs), + static_cast(std::numeric_limits::max()))); + } + uint8_t rgb_to_y(uint8_t r, uint8_t g, uint8_t b) { + const int shifted16 = (16 << kWeightScale); + const int halfShift = (1 << (kWeightScale - 1)); + int yy = + kRYWeight * r + kGYWeight * g + kBYWeight * b + halfShift + shifted16; + + return std::clamp(yy >> kWeightScale, 0, 0xff); + } + + static void rgb_to_uv(uint8_t r, uint8_t g, uint8_t b, uint8_t& u, + uint8_t& v) { + const int halfShift = (1 << (kWeightScale - 1)); + const int shifted128 = (128 << kWeightScale); + int uu = + kRUWeight * r + kGUWeight * g + kBUWeight * b + halfShift + shifted128; + int vv = + kBUWeight * r + kGVWeight * g + kBVWeight * b + halfShift + shifted128; + + u = std::clamp(uu >> kWeightScale, 0, 0xff); + v = std::clamp(vv >> kWeightScale, 0, 0xff); + } + void calculate_reference(const uint8_t* src, size_t src_stride, + uint8_t* y_dst, size_t y_stride, uint8_t* uv_dst, + size_t uv_stride, size_t width, size_t height, + bool is_nv21, bool RGB, size_t channels) { + const uint8_t* src_row = nullptr; + uint8_t* y_row = nullptr; + uint8_t* u_row = nullptr; + + for (size_t h = 0; h < height; h++) { + src_row = src + src_stride * h; + y_row = y_dst + y_stride * h; + + bool evenRow = (h % 2) == 0; + if (evenRow) { + u_row = uv_dst + uv_stride * (h / 2); + } + + for (size_t w = 0; w < width; w++) { + uint8_t b0{}, g0{}, r0{}; + b0 = src_row[w * channels + 0]; + g0 = src_row[w * channels + 1]; + r0 = src_row[w * channels + 2]; + if (RGB) { + std::swap(b0, r0); + } + uint8_t y0 = rgb_to_y(r0, g0, b0); + y_row[w] = y0; + bool evenCol = (w % 2) == 0; + if (evenRow && evenCol) { + uint8_t uu{}, vv{}; + rgb_to_uv(r0, g0, b0, uu, vv); + if (is_nv21) { + std::swap(uu, vv); + } + u_row[w + 0] = uu; + u_row[w + 1] = vv; + } + } + } + } +}; + +TEST_F(RGB2YUV420SpTest, ConvertspaddedInputsWithAllParamCombinations) { + for (const auto& params : get_test_cases()) { + run_test_case(params); + } +} + +TEST_F(RGB2YUV420SpTest, ReturnsErrorForUnsupportedCombinations) { + run_unsupported(kleidicv_rgb_to_yuv420_sp_u8, 3, true); + run_unsupported(kleidicv_rgba_to_yuv420_sp_u8, 4, true); + run_unsupported(kleidicv_bgr_to_yuv420_sp_u8, 3, true); + run_unsupported(kleidicv_bgra_to_yuv420_sp_u8, 4, true); + run_unsupported(kleidicv_rgb_to_yuv420_sp_u8, 3, false); + run_unsupported(kleidicv_rgba_to_yuv420_sp_u8, 4, false); + run_unsupported(kleidicv_bgr_to_yuv420_sp_u8, 3, false); + run_unsupported(kleidicv_bgra_to_yuv420_sp_u8, 4, false); +} diff --git a/test/api/test_thread_rgb_to_yuv_p.cpp b/test/api/test_thread_rgb_to_yuv_p.cpp new file mode 100644 index 000000000..84e64a806 --- /dev/null +++ b/test/api/test_thread_rgb_to_yuv_p.cpp @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv_thread/kleidicv_thread.h" +#include "multithreading_fake.h" + +// Tuple of width, height, thread count. +typedef std::tuple P; + +class RgbToYuv420Thread : public testing::TestWithParam

{ + public: + template + void check(SingleThreadedFunc single_threaded_func, + MultithreadedFunc multithreaded_func, size_t channels) { + unsigned width = 0, height = 0, thread_count = 0; + std::tie(width, height, thread_count) = GetParam(); + test::Array2D src(size_t{width} * channels, height), + dst_single(width, (height * 3 + 1) / 2), + dst_multi(width, (height * 3 + 1) / 2); + + test::PseudoRandomNumberGenerator generator; + src.fill(generator); + + kleidicv_error_t single_result = + single_threaded_func(src.data(), src.stride(), dst_single.data(), + dst_single.stride(), width, height, false); + + kleidicv_error_t multi_result = multithreaded_func( + src.data(), src.stride(), dst_multi.data(), dst_multi.stride(), width, + height, false, get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, single_result); + EXPECT_EQ(KLEIDICV_OK, multi_result); + EXPECT_EQ_ARRAY2D(dst_multi, dst_single); + } +}; + +TEST_P(RgbToYuv420Thread, FromBGR) { + check(kleidicv_bgr_to_yuv420_p_u8, kleidicv_thread_bgr_to_yuv420_p_u8, 3); +} +TEST_P(RgbToYuv420Thread, FromBGRA) { + check(kleidicv_bgra_to_yuv420_p_u8, kleidicv_thread_bgra_to_yuv420_p_u8, 4); +} +TEST_P(RgbToYuv420Thread, FromRGB) { + check(kleidicv_rgb_to_yuv420_p_u8, kleidicv_thread_rgb_to_yuv420_p_u8, 3); +} +TEST_P(RgbToYuv420Thread, FromRGBA) { + check(kleidicv_rgba_to_yuv420_p_u8, kleidicv_thread_rgba_to_yuv420_p_u8, 4); +} + +INSTANTIATE_TEST_SUITE_P(, RgbToYuv420Thread, + testing::Values(P{1, 1, 1}, P{1, 2, 1}, P{1, 2, 2}, + P{2, 1, 2}, P{2, 2, 1}, P{1, 3, 2}, + P{2, 3, 1}, P{6, 4, 1}, P{4, 5, 2}, + P{2, 6, 3}, P{1, 7, 4}, P{12, 34, 5}, + P{12, 37, 5}, P{2, 1000, 2})); diff --git a/test/api/test_thread_rgb_to_yuv_sp.cpp b/test/api/test_thread_rgb_to_yuv_sp.cpp new file mode 100644 index 000000000..30b35bb48 --- /dev/null +++ b/test/api/test_thread_rgb_to_yuv_sp.cpp @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include + +#include "framework/array.h" +#include "framework/generator.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/utils.h" +#include "kleidicv_thread/kleidicv_thread.h" +#include "multithreading_fake.h" +#include "test_config.h" + +// Tuple of width, height, thread count. +typedef std::tuple P; + +class RgbToYuv420SpThread : public testing::TestWithParam

{ + public: + template + void check(SingleThreadedFunc single_threaded_func, + MultithreadedFunc multithreaded_func, size_t channels) { + unsigned width = 0, height = 0, thread_count = 0; + std::tie(width, height, thread_count) = GetParam(); + test::Array2D src(size_t{width} * channels, height), + y_dst_single(width, height), + uv_dst_single(KLEIDICV_TARGET_NAMESPACE::align_up(width, 2), + (height + 1) / 2), + y_dst_multi(width, height), + uv_dst_multi(KLEIDICV_TARGET_NAMESPACE::align_up(width, 2), + (height + 1) / 2); + + test::PseudoRandomNumberGenerator generator; + src.fill(generator); + + kleidicv_error_t single_result = single_threaded_func( + src.data(), src.stride(), y_dst_single.data(), y_dst_single.stride(), + uv_dst_single.data(), uv_dst_single.stride(), width, height, false); + + kleidicv_error_t multi_result = multithreaded_func( + src.data(), src.stride(), y_dst_multi.data(), y_dst_multi.stride(), + uv_dst_multi.data(), uv_dst_multi.stride(), width, height, false, + get_multithreading_fake(thread_count)); + + EXPECT_EQ(KLEIDICV_OK, single_result); + EXPECT_EQ(KLEIDICV_OK, multi_result); + EXPECT_EQ_ARRAY2D(y_dst_multi, y_dst_single); + EXPECT_EQ_ARRAY2D(uv_dst_multi, uv_dst_single); + } +}; + +TEST_P(RgbToYuv420SpThread, FromBGR) { + check(kleidicv_bgr_to_yuv420_sp_u8, kleidicv_thread_bgr_to_yuv420_sp_u8, 3); +} +TEST_P(RgbToYuv420SpThread, FromBGRA) { + check(kleidicv_bgra_to_yuv420_sp_u8, kleidicv_thread_bgra_to_yuv420_sp_u8, 4); +} +TEST_P(RgbToYuv420SpThread, FromRGB) { + check(kleidicv_rgb_to_yuv420_sp_u8, kleidicv_thread_rgb_to_yuv420_sp_u8, 3); +} +TEST_P(RgbToYuv420SpThread, FromRGBA) { + check(kleidicv_rgba_to_yuv420_sp_u8, kleidicv_thread_rgba_to_yuv420_sp_u8, 4); +} + +INSTANTIATE_TEST_SUITE_P(, RgbToYuv420SpThread, + testing::Values(P{1, 1, 1}, P{1, 2, 1}, P{1, 2, 2}, + P{2, 1, 2}, P{2, 2, 1}, P{1, 3, 2}, + P{2, 3, 1}, P{6, 4, 1}, P{4, 5, 2}, + P{2, 6, 3}, P{1, 7, 4}, P{12, 34, 5}, + P{12, 37, 5}, P{2, 1000, 2})); -- GitLab From 16b6c4ba8e3a676d3a87033e4ac0bc0db7cb4de1 Mon Sep 17 00:00:00 2001 From: Noureldin Abdelfattah Date: Wed, 6 Aug 2025 16:53:44 +0100 Subject: [PATCH 2/2] Add RGBx & BGRx to YUV420 --- .../kleidicv/conversions/rgb_to_yuv_420.h | 110 ++++++ kleidicv/src/conversions/rgb_to_yuv420_neon.h | 16 +- kleidicv/src/conversions/rgb_to_yuv420_sc.h | 326 ++++++++++++++++++ .../src/conversions/rgb_to_yuv420p_api.cpp | 7 +- .../src/conversions/rgb_to_yuv420p_neon.cpp | 8 +- .../src/conversions/rgb_to_yuv420p_sme.cpp | 62 ++++ .../src/conversions/rgb_to_yuv420p_sve2.cpp | 70 ++++ .../src/conversions/rgb_to_yuv420sp_api.cpp | 7 +- .../src/conversions/rgb_to_yuv420sp_neon.cpp | 8 +- .../src/conversions/rgb_to_yuv420sp_sme.cpp | 66 ++++ .../src/conversions/rgb_to_yuv420sp_sve2.cpp | 74 ++++ kleidicv/src/conversions/yuv420_to_rgb_neon.h | 2 - kleidicv/src/conversions/yuv_p_to_rgb_sc.h | 7 +- test/api/test_rgb_to_yuv_420_p.cpp | 8 +- test/api/test_rgb_to_yuv_420_sp.cpp | 8 +- test/api/test_thread_rgb_to_yuv_p.cpp | 2 +- test/api/test_thread_rgb_to_yuv_sp.cpp | 2 +- 17 files changed, 743 insertions(+), 40 deletions(-) create mode 100644 kleidicv/src/conversions/rgb_to_yuv420_sc.h create mode 100644 kleidicv/src/conversions/rgb_to_yuv420p_sme.cpp create mode 100644 kleidicv/src/conversions/rgb_to_yuv420p_sve2.cpp create mode 100644 kleidicv/src/conversions/rgb_to_yuv420sp_sme.cpp create mode 100644 kleidicv/src/conversions/rgb_to_yuv420sp_sve2.cpp diff --git a/kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h b/kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h index 393335744..c5c7ad3ea 100644 --- a/kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h +++ b/kleidicv/include/kleidicv/conversions/rgb_to_yuv_420.h @@ -150,6 +150,116 @@ kleidicv_error_t bgra_to_yuv420_p_stripe_u8(const uint8_t *src, } // namespace neon +namespace sve2 { +kleidicv_error_t rgb_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgba_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgr_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgra_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgb_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgba_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgr_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgra_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +} // namespace sve2 + +namespace sme { +kleidicv_error_t rgb_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgba_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgr_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgra_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgb_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t rgba_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgr_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +kleidicv_error_t bgra_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end); + +} // namespace sme + } // namespace kleidicv #endif // KLEIDICV_CONVERSIONS_RGB_TO_YUV420_H diff --git a/kleidicv/src/conversions/rgb_to_yuv420_neon.h b/kleidicv/src/conversions/rgb_to_yuv420_neon.h index 4c3c9d95b..66f176765 100644 --- a/kleidicv/src/conversions/rgb_to_yuv420_neon.h +++ b/kleidicv/src/conversions/rgb_to_yuv420_neon.h @@ -19,7 +19,7 @@ static const int kWeightScale = 20; template class RGBxorBGRxToYUV420 { public: - static kleidicv_error_t rgb2yuv420p_operation( + static kleidicv_error_t rgb2yuv420_operation( const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, bool is_nv21, size_t begin, size_t end) { @@ -78,23 +78,23 @@ class RGBxorBGRxToYUV420 { uint8_t *u_row, uint8_t *v_row, bool &is_nv21, size_t &index, bool &evenRow) { const size_t vsize = 16; - // processing (2*vsize) pixels at once uint8x16_t b0, b1, g0, g1, r0, r1; load_rgb(b0, b1, g0, g1, r0, r1, src_row, index); - // Convert both vectors to luminance (Y channel) + uint8x16_t y0 = rgb_to_y(r0, g0, b0); uint8x16_t y1 = rgb_to_y(r1, g1, b1); - // Store Y values: 32 pixels total (two vectors) vst1q_u8(y_row + index, y0); vst1q_u8(y_row + index + vsize, y1); if (evenRow) { uint8x16x2_t uv; rgb_to_uv(r0, r1, g0, g1, b0, b1, uv.val[0], uv.val[1]); + if (is_nv21) { std::swap(uv.val[0], uv.val[1]); } + if constexpr (INTERLEAVE) { vst2q_u8(u_row + index, uv); } else { @@ -180,7 +180,7 @@ class RGBxorBGRxToYUV420 { uint32x4_t b_hi_lo = vreinterpretq_u32_u8(vqtbl1q_u8(b, index_hi_lo)); uint32x4_t b_hi_hi = vreinterpretq_u32_u8(vqtbl1q_u8(b, index_hi_hi)); - // Prepare constants for fixed-point MAC (multiply-accumulate) + // Y = kR*R + kG*G + kB*B + rounding bias uint32x4_t v_kRYWeight = vdupq_n_u32(kRYWeight); uint32x4_t v_kGYWeight = vdupq_n_u32(kGYWeight); uint32x4_t v_kBYWeight = vdupq_n_u32(kBYWeight); @@ -189,7 +189,6 @@ class RGBxorBGRxToYUV420 { uint32x4_t y_hi_lo = vdupq_n_u32(halfShift + shifted16); uint32x4_t y_hi_hi = vdupq_n_u32(halfShift + shifted16); - // Apply Y = kR*R + kG*G + kB*B + rounding bias y_lo_lo = vmlaq_u32(y_lo_lo, r_lo_lo, v_kRYWeight); y_lo_hi = vmlaq_u32(y_lo_hi, r_lo_hi, v_kRYWeight); y_hi_lo = vmlaq_u32(y_hi_lo, r_hi_lo, v_kRYWeight); @@ -275,13 +274,12 @@ class RGBxorBGRxToYUV420 { int32x4_t v_kGUWeight = vdupq_n_s32(kGUWeight); int32x4_t v_kBUWeight = vdupq_n_s32(kBUWeight); - // Initialize accumulation with bias + // U = R * kRU + G * kGU + B * kBU + bias int32x4_t u_lo_lo = vdupq_n_s32(halfShift + shifted128); int32x4_t u_lo_hi = vdupq_n_s32(halfShift + shifted128); int32x4_t u_hi_lo = vdupq_n_s32(halfShift + shifted128); int32x4_t u_hi_hi = vdupq_n_s32(halfShift + shifted128); - // U = R * kRU + G * kGU + B * kBU + bias u_lo_lo = vmlaq_s32(u_lo_lo, r_lo_lo, v_kRUWeight); u_lo_hi = vmlaq_s32(u_lo_hi, r_lo_hi, v_kRUWeight); u_hi_lo = vmlaq_s32(u_hi_lo, r_hi_lo, v_kRUWeight); @@ -321,12 +319,12 @@ class RGBxorBGRxToYUV420 { int32x4_t v_kBVWeight = vdupq_n_s32(kBVWeight); v_kRUWeight = vdupq_n_s32(kBUWeight); + // V = R * kBU + G * kGV + B * kBV + bias int32x4_t v_lo_lo = vdupq_n_s32(halfShift + shifted128); int32x4_t v_lo_hi = vdupq_n_s32(halfShift + shifted128); int32x4_t v_hi_lo = vdupq_n_s32(halfShift + shifted128); int32x4_t v_hi_hi = vdupq_n_s32(halfShift + shifted128); - // V = R * kBU + G * kGV + B * kBV + bias v_lo_lo = vmlaq_s32(v_lo_lo, r_lo_lo, v_kRUWeight); v_lo_hi = vmlaq_s32(v_lo_hi, r_lo_hi, v_kRUWeight); v_hi_lo = vmlaq_s32(v_hi_lo, r_hi_lo, v_kRUWeight); diff --git a/kleidicv/src/conversions/rgb_to_yuv420_sc.h b/kleidicv/src/conversions/rgb_to_yuv420_sc.h new file mode 100644 index 000000000..4a228ba0c --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420_sc.h @@ -0,0 +1,326 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_RGB_TO_YUV420_SC_H +#define KLEIDICV_RGB_TO_YUV420_SC_H + +#include +#include + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/sve2.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +// Coefficients for RGB to YUV420 conversion +static const int kWeightScale = 20; + +template +class RGBxorBGRxToYUV420 { + public: + static kleidicv_error_t rgb2yuv420_operation_sc( + const uint8_t *src, size_t src_stride, uint8_t *y_dst, size_t y_stride, + uint8_t *uv_dst, size_t uv_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) KLEIDICV_STREAMING { + size_t row_begin = begin * 2; + size_t row_end = std::min(height, end * 2); + const uint8_t *src_row = nullptr; + uint8_t *y_row = nullptr; + uint8_t *u_row = nullptr; + uint8_t *v_row = nullptr; + for (size_t h = row_begin; h < row_end; h++) { + src_row = src + src_stride * h; + y_row = y_dst + y_stride * h; + bool evenRow = (h & 1) == 0; + if (evenRow) { + if constexpr (INTERLEAVE) { + u_row = uv_dst + uv_stride * (h / 2); + } else { + u_row = + uv_dst + uv_stride * (h / 4) + ((h / 2) % 2) * ((width + 1) / 2); + // Pointer to the start of the V plane. + // The V plane follows the U plane. Both U and V planes are + // subsampled at a 2:1 vertical ratio (i.e., each has height / 2 + // rows), and are often stored in a single contiguous chroma region in + // memory. Depending on image height and stride, the starting offset + // of V may require adjustment to maintain correct alignment. In + // particular, the chroma rows may not align perfectly, so a + // fractional offset (in rows) is applied to calculate the V plane + // position. The formula used here accounts for this by adjusting + // based on row parity, assuming consistent memory layout across the + // Y, U, and V planes. + v_row = uv_dst + uv_stride * ((h + height + 1) / 4) + + (((h + height + 1) / 2) % 2) * ((width + 1) / 2); + } + } + + const size_t vsize = svcntb(); + LoopUnroll2 loop{width, vsize}; + + loop.unroll_twice([&](size_t index) KLEIDICV_STREAMING { + svbool_t pg = svptrue_b8(); + + vector_path(src_row, y_row, u_row, v_row, is_nv21, index, evenRow, pg, + pg, pg); + }); + + loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { + svbool_t pg_w0 = svwhilelt_b8(index, length); + svbool_t pg_w1 = svwhilelt_b8(index + vsize, length); + svbool_t pg_half_width = + svwhilelt_b8((index + 1) >> 1, (length + 1) >> 1); + + vector_path(src_row, y_row, u_row, v_row, is_nv21, index, evenRow, + pg_w0, pg_w1, pg_half_width); + }); + } + return KLEIDICV_OK; + } + + private: + static void vector_path(const uint8_t *src_row, uint8_t *y_row, + uint8_t *u_row, uint8_t *v_row, bool is_nv21, + size_t &index, bool evenRow, svbool_t &pg_w0, + svbool_t &pg_w1, + svbool_t &pg_half_width) KLEIDICV_STREAMING { + const size_t vsize = svcntb(); + svuint8_t b0, b1, g0, g1, r0, r1; + load_rgb(b0, b1, g0, g1, r0, r1, src_row, index, pg_w0, pg_w1); + + svuint8_t y0 = rgb_to_y(r0, g0, b0); + svuint8_t y1 = rgb_to_y(r1, g1, b1); + + svst1(pg_w0, y_row + index, y0); + svst1(pg_w1, y_row + index + vsize, y1); + + if (evenRow) { + svuint8_t u, v; + rgb_to_uv(r0, r1, g0, g1, b0, b1, u, v); + + if (is_nv21) { + swap_scalable(u, v); + } + + if constexpr (INTERLEAVE) { + svuint8x2_t uv = svcreate2(u, v); + svst2_u8(pg_half_width, u_row + index, uv); + } else { + svst1(pg_half_width, u_row + (index >> 1), u); + svst1(pg_half_width, v_row + (index >> 1), v); + } + } + } + + static svuint8_t rgb_to_y(const svuint8_t &r, const svuint8_t &g, + const svuint8_t &b) KLEIDICV_STREAMING { + svuint16_t r_lo = svunpklo(r); + svuint16_t r_hi = svunpkhi(r); + svuint32_t r_lo_lo = svunpklo(r_lo); + svuint32_t r_lo_hi = svunpkhi(r_lo); + svuint32_t r_hi_lo = svunpklo(r_hi); + svuint32_t r_hi_hi = svunpkhi(r_hi); + + svuint16_t g_lo = svunpklo(g); + svuint16_t g_hi = svunpkhi(g); + svuint32_t g_lo_lo = svunpklo(g_lo); + svuint32_t g_lo_hi = svunpkhi(g_lo); + svuint32_t g_hi_lo = svunpklo(g_hi); + svuint32_t g_hi_hi = svunpkhi(g_hi); + + svuint16_t b_lo = svunpklo(b); + svuint16_t b_hi = svunpkhi(b); + svuint32_t b_lo_lo = svunpklo(b_lo); + svuint32_t b_lo_hi = svunpkhi(b_lo); + svuint32_t b_hi_lo = svunpklo(b_hi); + svuint32_t b_hi_hi = svunpkhi(b_hi); + + const uint32_t shifted16 = (16 << kWeightScale); + const uint32_t halfShift = (1 << (kWeightScale - 1)); + + svbool_t pg = svptrue_b32(); + svuint32_t v_kRYWeight = svdup_u32(kRYWeight); + svuint32_t v_kGYWeight = svdup_u32(kGYWeight); + svuint32_t v_kBYWeight = svdup_u32(kBYWeight); + + // Y = kR*R + kG*G + kB*B + rounding bias + svuint32_t y_lo_lo = svdup_u32(halfShift + shifted16); + svuint32_t y_lo_hi = svdup_u32(halfShift + shifted16); + svuint32_t y_hi_lo = svdup_u32(halfShift + shifted16); + svuint32_t y_hi_hi = svdup_u32(halfShift + shifted16); + + y_lo_lo = svmla_x(pg, y_lo_lo, r_lo_lo, v_kRYWeight); + y_lo_hi = svmla_x(pg, y_lo_hi, r_lo_hi, v_kRYWeight); + y_hi_lo = svmla_x(pg, y_hi_lo, r_hi_lo, v_kRYWeight); + y_hi_hi = svmla_x(pg, y_hi_hi, r_hi_hi, v_kRYWeight); + + y_lo_lo = svmla_x(pg, y_lo_lo, g_lo_lo, v_kGYWeight); + y_lo_hi = svmla_x(pg, y_lo_hi, g_lo_hi, v_kGYWeight); + y_hi_lo = svmla_x(pg, y_hi_lo, g_hi_lo, v_kGYWeight); + y_hi_hi = svmla_x(pg, y_hi_hi, g_hi_hi, v_kGYWeight); + + y_lo_lo = svmla_x(pg, y_lo_lo, b_lo_lo, v_kBYWeight); + y_lo_hi = svmla_x(pg, y_lo_hi, b_lo_hi, v_kBYWeight); + y_hi_lo = svmla_x(pg, y_hi_lo, b_hi_lo, v_kBYWeight); + y_hi_hi = svmla_x(pg, y_hi_hi, b_hi_hi, v_kBYWeight); + + svuint16_t y_b = + svuzp2_u16(svreinterpret_u16(y_lo_lo), svreinterpret_u16(y_lo_hi)); + svuint16_t y_t = + svuzp2_u16(svreinterpret_u16(y_hi_lo), svreinterpret_u16(y_hi_hi)); + + y_b = svlsr_n_u16_x(pg, y_b, 4); + y_t = svlsr_n_u16_x(pg, y_t, 4); + + svuint8_t y = svuzp1_u8(svreinterpret_u8(y_b), svreinterpret_u8(y_t)); + + return y; + } + + static void rgb_to_uv(const svuint8_t &r0, const svuint8_t &r1, + const svuint8_t &g0, const svuint8_t &g1, + const svuint8_t &b0, const svuint8_t &b1, svuint8_t &u, + svuint8_t &v) KLEIDICV_STREAMING { + svint16_t r0_even = svreinterpret_s16(svmovlb(r0)); + svint16_t r1_even = svreinterpret_s16(svmovlb(r1)); + + svint32_t r_lo_lo = svunpklo(r0_even); + svint32_t r_lo_hi = svunpkhi(r0_even); + svint32_t r_hi_lo = svunpklo(r1_even); + svint32_t r_hi_hi = svunpkhi(r1_even); + + svint16_t g0_even = svreinterpret_s16(svmovlb(g0)); + svint16_t g1_even = svreinterpret_s16(svmovlb(g1)); + + svint32_t g_lo_lo = svunpklo(g0_even); + svint32_t g_lo_hi = svunpkhi(g0_even); + svint32_t g_hi_lo = svunpklo(g1_even); + svint32_t g_hi_hi = svunpkhi(g1_even); + + svint16_t b0_even = svreinterpret_s16(svmovlb(b0)); + svint16_t b1_even = svreinterpret_s16(svmovlb(b1)); + + svint32_t b_lo_lo = svunpklo(b0_even); + svint32_t b_lo_hi = svunpkhi(b0_even); + svint32_t b_hi_lo = svunpklo(b1_even); + svint32_t b_hi_hi = svunpkhi(b1_even); + + svbool_t pg = svptrue_b32(); + const int halfShift = (1 << (kWeightScale - 1)); + const int shifted128 = (128 << kWeightScale); + + // ---------------- U (Cb) Component ---------------- + svint32_t v_kRUWeight = svdup_s32(kRUWeight); + svint32_t v_kGUWeight = svdup_s32(kGUWeight); + svint32_t v_kBUWeight = svdup_s32(kBUWeight); + + // U = R * kRU + G * kGU + B * kBU + bias + svint32_t u_lo_lo = svdup_s32(halfShift + shifted128); + svint32_t u_lo_hi = svdup_s32(halfShift + shifted128); + svint32_t u_hi_lo = svdup_s32(halfShift + shifted128); + svint32_t u_hi_hi = svdup_s32(halfShift + shifted128); + + u_lo_lo = svmla_x(pg, u_lo_lo, r_lo_lo, v_kRUWeight); + u_lo_hi = svmla_x(pg, u_lo_hi, r_lo_hi, v_kRUWeight); + u_hi_lo = svmla_x(pg, u_hi_lo, r_hi_lo, v_kRUWeight); + u_hi_hi = svmla_x(pg, u_hi_hi, r_hi_hi, v_kRUWeight); + + u_lo_lo = svmla_x(pg, u_lo_lo, g_lo_lo, v_kGUWeight); + u_lo_hi = svmla_x(pg, u_lo_hi, g_lo_hi, v_kGUWeight); + u_hi_lo = svmla_x(pg, u_hi_lo, g_hi_lo, v_kGUWeight); + u_hi_hi = svmla_x(pg, u_hi_hi, g_hi_hi, v_kGUWeight); + + u_lo_lo = svmla_x(pg, u_lo_lo, b_lo_lo, v_kBUWeight); + u_lo_hi = svmla_x(pg, u_lo_hi, b_lo_hi, v_kBUWeight); + u_hi_lo = svmla_x(pg, u_hi_lo, b_hi_lo, v_kBUWeight); + u_hi_hi = svmla_x(pg, u_hi_hi, b_hi_hi, v_kBUWeight); + + svint16_t u_b = + svuzp2_s16(svreinterpret_s16(u_lo_lo), svreinterpret_s16(u_lo_hi)); + svint16_t u_t = + svuzp2_s16(svreinterpret_s16(u_hi_lo), svreinterpret_s16(u_hi_hi)); + + u_b = svasr_n_s16_x(pg, u_b, 4); + u_t = svasr_n_s16_x(pg, u_t, 4); + + u = svuzp1_u8(svreinterpret_u8(u_b), svreinterpret_u8(u_t)); + + // ---------------- V (Cr) Component ---------------- + svint32_t v_kGVWeight = svdup_s32(kGVWeight); + svint32_t v_kBVWeight = svdup_s32(kBVWeight); + + // V = R * kBU + G * kGV + B * kBV + bias + svint32_t v_lo_lo = svdup_s32(halfShift + shifted128); + svint32_t v_lo_hi = svdup_s32(halfShift + shifted128); + svint32_t v_hi_lo = svdup_s32(halfShift + shifted128); + svint32_t v_hi_hi = svdup_s32(halfShift + shifted128); + + v_lo_lo = svmla_x(pg, v_lo_lo, r_lo_lo, v_kBUWeight); + v_lo_hi = svmla_x(pg, v_lo_hi, r_lo_hi, v_kBUWeight); + v_hi_lo = svmla_x(pg, v_hi_lo, r_hi_lo, v_kBUWeight); + v_hi_hi = svmla_x(pg, v_hi_hi, r_hi_hi, v_kBUWeight); + + v_lo_lo = svmla_x(pg, v_lo_lo, g_lo_lo, v_kGVWeight); + v_lo_hi = svmla_x(pg, v_lo_hi, g_lo_hi, v_kGVWeight); + v_hi_lo = svmla_x(pg, v_hi_lo, g_hi_lo, v_kGVWeight); + v_hi_hi = svmla_x(pg, v_hi_hi, g_hi_hi, v_kGVWeight); + + v_lo_lo = svmla_x(pg, v_lo_lo, b_lo_lo, v_kBVWeight); + v_lo_hi = svmla_x(pg, v_lo_hi, b_lo_hi, v_kBVWeight); + v_hi_lo = svmla_x(pg, v_hi_lo, b_hi_lo, v_kBVWeight); + v_hi_hi = svmla_x(pg, v_hi_hi, b_hi_hi, v_kBVWeight); + + svint16_t v_b = + svuzp2_s16(svreinterpret_s16(v_lo_lo), svreinterpret_s16(v_lo_hi)); + svint16_t v_t = + svuzp2_s16(svreinterpret_s16(v_hi_lo), svreinterpret_s16(v_hi_hi)); + + v_b = svasr_n_s16_x(pg, v_b, 4); + v_t = svasr_n_s16_x(pg, v_t, 4); + + v = svuzp1_u8(svreinterpret_u8(v_b), svreinterpret_u8(v_t)); + } + + static void load_rgb(svuint8_t &b0, svuint8_t &b1, svuint8_t &g0, + svuint8_t &g1, svuint8_t &r0, svuint8_t &r1, + const uint8_t *src_row, size_t w, svbool_t &pg_w0, + svbool_t &pg_w1) KLEIDICV_STREAMING { + const size_t vsize = svcntb(); + constexpr size_t scn = ALPHA ? 4 : 3; + if constexpr (ALPHA) { + // 4-channel input (e.g., RGBA or BGRA) + svuint8x4_t vsrc0 = svld4(pg_w0, src_row + scn * w); + svuint8x4_t vsrc1 = svld4(pg_w1, src_row + scn * w + scn * vsize); + + b0 = svget4(vsrc0, b_index); + g0 = svget4(vsrc0, g_index); + r0 = svget4(vsrc0, r_index); + + b1 = svget4(vsrc1, b_index); + g1 = svget4(vsrc1, g_index); + r1 = svget4(vsrc1, r_index); + + } else { + // 3-channel input (e.g., RGB or BGR) + svuint8x3_t vsrc0 = svld3(pg_w0, src_row + scn * w); + svuint8x3_t vsrc1 = svld3(pg_w1, src_row + scn * w + scn * vsize); + + b0 = svget3(vsrc0, b_index); + g0 = svget3(vsrc0, g_index); + r0 = svget3(vsrc0, r_index); + + b1 = svget3(vsrc1, b_index); + g1 = svget3(vsrc1, g_index); + r1 = svget3(vsrc1, r_index); + } + } + + static constexpr int b_index = RGB ? 2 : 0; + static constexpr int g_index = 1; + static constexpr int r_index = RGB ? 0 : 2; +}; + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_RGB_TO_YUV420_SC_H diff --git a/kleidicv/src/conversions/rgb_to_yuv420p_api.cpp b/kleidicv/src/conversions/rgb_to_yuv420p_api.cpp index cbdbc587a..dbaf15d36 100644 --- a/kleidicv/src/conversions/rgb_to_yuv420p_api.cpp +++ b/kleidicv/src/conversions/rgb_to_yuv420p_api.cpp @@ -6,9 +6,10 @@ #include "kleidicv/dispatch.h" #include "kleidicv/kleidicv.h" -#define KLEIDICV_DEFINE_C_API(name, partialname) \ - KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::partialname, nullptr, \ - nullptr, nullptr) +#define KLEIDICV_DEFINE_C_API(name, partialname) \ + KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::partialname, \ + &kleidicv::sve2::partialname, \ + &kleidicv::sme::partialname, nullptr) KLEIDICV_DEFINE_C_API(kleidicv_rgb_to_yuv420_p_stripe_u8, rgb_to_yuv420_p_stripe_u8); diff --git a/kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp b/kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp index 30878b9af..25d045279 100644 --- a/kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp +++ b/kleidicv/src/conversions/rgb_to_yuv420p_neon.cpp @@ -19,7 +19,7 @@ kleidicv_error_t rgb_to_yuv420_p_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); CHECK_IMAGE_SIZE(width, height); uint8_t *uv_dst = dst + dst_stride * height; - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, is_nv21, begin, end); } @@ -34,7 +34,7 @@ kleidicv_error_t rgba_to_yuv420_p_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); CHECK_IMAGE_SIZE(width, height); uint8_t *uv_dst = dst + dst_stride * height; - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, is_nv21, begin, end); } @@ -49,7 +49,7 @@ kleidicv_error_t bgr_to_yuv420_p_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); CHECK_IMAGE_SIZE(width, height); uint8_t *uv_dst = dst + dst_stride * height; - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, is_nv21, begin, end); } @@ -64,7 +64,7 @@ kleidicv_error_t bgra_to_yuv420_p_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); CHECK_IMAGE_SIZE(width, height); uint8_t *uv_dst = dst + dst_stride * height; - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, is_nv21, begin, end); } diff --git a/kleidicv/src/conversions/rgb_to_yuv420p_sme.cpp b/kleidicv/src/conversions/rgb_to_yuv420p_sme.cpp new file mode 100644 index 000000000..251262579 --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420p_sme.cpp @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "rgb_to_yuv420_sc.h" + +namespace kleidicv::sme { + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +rgb_to_yuv420_p_stripe_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +rgba_to_yuv420_p_stripe_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +bgr_to_yuv420_p_stripe_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +bgra_to_yuv420_p_stripe_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +} // namespace kleidicv::sme diff --git a/kleidicv/src/conversions/rgb_to_yuv420p_sve2.cpp b/kleidicv/src/conversions/rgb_to_yuv420p_sve2.cpp new file mode 100644 index 000000000..4f89b5d21 --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420p_sve2.cpp @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "rgb_to_yuv420_sc.h" + +namespace kleidicv::sve2 { + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgb_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgba_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgr_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgra_to_yuv420_p_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(dst, dst_stride, (height * 3 + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + uint8_t *uv_dst = dst + dst_stride * height; + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, dst, dst_stride, uv_dst, dst_stride, width, height, + is_nv21, begin, end); +} + +} // namespace kleidicv::sve2 diff --git a/kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp b/kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp index dfc1769eb..15cba9e10 100644 --- a/kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp +++ b/kleidicv/src/conversions/rgb_to_yuv420sp_api.cpp @@ -6,9 +6,10 @@ #include "kleidicv/dispatch.h" #include "kleidicv/kleidicv.h" -#define KLEIDICV_DEFINE_C_API(name, partialname) \ - KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::partialname, nullptr, \ - nullptr, nullptr) +#define KLEIDICV_DEFINE_C_API(name, partialname) \ + KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::partialname, \ + &kleidicv::sve2::partialname, \ + &kleidicv::sme::partialname, nullptr) KLEIDICV_DEFINE_C_API(kleidicv_rgb_to_yuv420_sp_stripe_u8, rgb_to_yuv420_sp_stripe_u8); diff --git a/kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp b/kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp index 5821ff488..5ee55dab4 100644 --- a/kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp +++ b/kleidicv/src/conversions/rgb_to_yuv420sp_neon.cpp @@ -20,7 +20,7 @@ kleidicv_error_t rgb_to_yuv420_sp_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); CHECK_IMAGE_SIZE(width, height); - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, is_nv21, begin, end); } @@ -36,7 +36,7 @@ kleidicv_error_t rgba_to_yuv420_sp_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); CHECK_IMAGE_SIZE(width, height); - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, is_nv21, begin, end); } @@ -52,7 +52,7 @@ kleidicv_error_t bgr_to_yuv420_sp_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); CHECK_IMAGE_SIZE(width, height); - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, is_nv21, begin, end); } @@ -68,7 +68,7 @@ kleidicv_error_t bgra_to_yuv420_sp_stripe_u8(const uint8_t *src, CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); CHECK_IMAGE_SIZE(width, height); - return RGBxorBGRxToYUV420::rgb2yuv420p_operation( + return RGBxorBGRxToYUV420::rgb2yuv420_operation( src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, is_nv21, begin, end); } diff --git a/kleidicv/src/conversions/rgb_to_yuv420sp_sme.cpp b/kleidicv/src/conversions/rgb_to_yuv420sp_sme.cpp new file mode 100644 index 000000000..b43e93b1e --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420sp_sme.cpp @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "rgb_to_yuv420_sc.h" + +namespace kleidicv::sme { + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +rgb_to_yuv420_sp_stripe_u8(const uint8_t *src, size_t src_stride, + uint8_t *y_dst, size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +rgba_to_yuv420_sp_stripe_u8(const uint8_t *src, size_t src_stride, + uint8_t *y_dst, size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +bgr_to_yuv420_sp_stripe_u8(const uint8_t *src, size_t src_stride, + uint8_t *y_dst, size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +bgra_to_yuv420_sp_stripe_u8(const uint8_t *src, size_t src_stride, + uint8_t *y_dst, size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, size_t height, + bool is_nv21, size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +} // namespace kleidicv::sme diff --git a/kleidicv/src/conversions/rgb_to_yuv420sp_sve2.cpp b/kleidicv/src/conversions/rgb_to_yuv420sp_sve2.cpp new file mode 100644 index 000000000..99759fcb1 --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv420sp_sve2.cpp @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv_420.h" +#include "rgb_to_yuv420_sc.h" + +namespace kleidicv::sve2 { + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgb_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgba_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgr_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgra_to_yuv420_sp_stripe_u8(const uint8_t *src, + size_t src_stride, uint8_t *y_dst, + size_t y_stride, uint8_t *uv_dst, + size_t uv_stride, size_t width, + size_t height, bool is_nv21, + size_t begin, size_t end) { + CHECK_POINTER_AND_STRIDE(src, src_stride, height); + CHECK_POINTER_AND_STRIDE(y_dst, y_stride, height); + CHECK_POINTER_AND_STRIDE(uv_dst, uv_stride, (height + 1) / 2); + CHECK_IMAGE_SIZE(width, height); + return RGBxorBGRxToYUV420::rgb2yuv420_operation_sc( + src, src_stride, y_dst, y_stride, uv_dst, uv_stride, width, height, + is_nv21, begin, end); +} + +} // namespace kleidicv::sve2 diff --git a/kleidicv/src/conversions/yuv420_to_rgb_neon.h b/kleidicv/src/conversions/yuv420_to_rgb_neon.h index 685cd8cd4..5bb47c6c6 100644 --- a/kleidicv/src/conversions/yuv420_to_rgb_neon.h +++ b/kleidicv/src/conversions/yuv420_to_rgb_neon.h @@ -5,8 +5,6 @@ #ifndef KLEIDICV_YUV420_TO_RGB_NEON_H #define KLEIDICV_YUV420_TO_RGB_NEON_H -#include - #include #include diff --git a/kleidicv/src/conversions/yuv_p_to_rgb_sc.h b/kleidicv/src/conversions/yuv_p_to_rgb_sc.h index 2395a0c45..f66ccbaa8 100644 --- a/kleidicv/src/conversions/yuv_p_to_rgb_sc.h +++ b/kleidicv/src/conversions/yuv_p_to_rgb_sc.h @@ -161,11 +161,8 @@ kleidicv_error_t yuv2rgbx_operation(OperationType &operation, }); loop.remaining([&](size_t index, size_t length) KLEIDICV_STREAMING { - size_t min_width = length - index; - size_t half_min_width = (min_width + 1) / 2; - svbool_t pg = svwhilelt_b8(int64_t(0), static_cast(min_width)); - svbool_t pg_half = - svwhilelt_b8(int64_t(0), static_cast(half_min_width)); + svbool_t pg = svwhilelt_b8(index, length); + svbool_t pg_half = svwhilelt_b8((index + 1) >> 1, (length + 1) >> 1); svuint8_t u8_vec = svld1(pg_half, u + (index >> 1)); svint16_t u_vec_lo = svreinterpret_s16_u16(svunpklo_u16(u8_vec)); diff --git a/test/api/test_rgb_to_yuv_420_p.cpp b/test/api/test_rgb_to_yuv_420_p.cpp index c351699ef..08476abaf 100644 --- a/test/api/test_rgb_to_yuv_420_p.cpp +++ b/test/api/test_rgb_to_yuv_420_p.cpp @@ -54,10 +54,10 @@ class RGB2YUV420pTest : public testing::Test { } static std::vector get_test_cases() { - std::vector widths = {1, 2, 4, 6, 18, 27, 32, 64, 3}; - std::vector src_paddings = {0}; - std::vector dst_paddings = {0}; - std::vector heights = {2, 5, 11, 16}; + std::vector widths = {18, 32, 64}; + std::vector src_paddings = {3}; + std::vector dst_paddings = {2}; + std::vector heights = {11, 16}; std::vector channels = {3, 4}; std::vector uv_cases = {true, false}; std::vector output_image_case = {true, false}; diff --git a/test/api/test_rgb_to_yuv_420_sp.cpp b/test/api/test_rgb_to_yuv_420_sp.cpp index 83f7b1723..81f134670 100644 --- a/test/api/test_rgb_to_yuv_420_sp.cpp +++ b/test/api/test_rgb_to_yuv_420_sp.cpp @@ -56,10 +56,10 @@ class RGB2YUV420SpTest : public testing::Test { } static std::vector get_test_cases() { - std::vector widths = {1, 2, 4, 6, 18, 27, 32, 64, 3}; - std::vector src_paddings = {0}; - std::vector dst_paddings = {0}; - std::vector heights = {2, 5, 11, 16}; + std::vector widths = {3, 27, 32, 64}; + std::vector src_paddings = {2}; + std::vector dst_paddings = {3}; + std::vector heights = {11, 16}; std::vector channels = {3, 4}; std::vector uv_cases = {true, false}; std::vector output_image_case = {true, false}; diff --git a/test/api/test_thread_rgb_to_yuv_p.cpp b/test/api/test_thread_rgb_to_yuv_p.cpp index 84e64a806..5bf8f49e5 100644 --- a/test/api/test_thread_rgb_to_yuv_p.cpp +++ b/test/api/test_thread_rgb_to_yuv_p.cpp @@ -23,7 +23,7 @@ class RgbToYuv420Thread : public testing::TestWithParam

{ MultithreadedFunc multithreaded_func, size_t channels) { unsigned width = 0, height = 0, thread_count = 0; std::tie(width, height, thread_count) = GetParam(); - test::Array2D src(size_t{width} * channels, height), + test::Array2D src(size_t{width} * channels, height, channels), dst_single(width, (height * 3 + 1) / 2), dst_multi(width, (height * 3 + 1) / 2); diff --git a/test/api/test_thread_rgb_to_yuv_sp.cpp b/test/api/test_thread_rgb_to_yuv_sp.cpp index 30b35bb48..ee1bd7522 100644 --- a/test/api/test_thread_rgb_to_yuv_sp.cpp +++ b/test/api/test_thread_rgb_to_yuv_sp.cpp @@ -26,7 +26,7 @@ class RgbToYuv420SpThread : public testing::TestWithParam

{ MultithreadedFunc multithreaded_func, size_t channels) { unsigned width = 0, height = 0, thread_count = 0; std::tie(width, height, thread_count) = GetParam(); - test::Array2D src(size_t{width} * channels, height), + test::Array2D src(size_t{width} * channels, height, channels), y_dst_single(width, height), uv_dst_single(KLEIDICV_TARGET_NAMESPACE::align_up(width, 2), (height + 1) / 2), -- GitLab