From a371173bff761c457a8c0f201ef865622b330044 Mon Sep 17 00:00:00 2001 From: Denes Tarjan Date: Fri, 26 Apr 2024 07:23:20 +0000 Subject: [PATCH] Implement RGBToYUV conversion for NEON --- adapters/opencv/kleidicv_hal.cpp | 38 ++++ adapters/opencv/kleidicv_hal.h | 15 ++ benchmark/benchmark.cpp | 30 +++ conformity/opencv/CMakeLists.txt | 6 +- conformity/opencv/test_rgb2yuv.cpp | 56 +++++ conformity/opencv/test_rgb2yuv.h | 14 ++ conformity/opencv/tests.cpp | 4 +- .../include/kleidicv/conversions/rgb_to_yuv.h | 63 +++++ .../include/kleidicv/conversions/yuv_to_rgb.h | 8 +- kleidicv/include/kleidicv/kleidicv.h | 58 ++++- kleidicv/src/conversions/rgb_to_yuv_api.cpp | 16 ++ kleidicv/src/conversions/rgb_to_yuv_neon.cpp | 215 ++++++++++++++++++ test/api/test_rgb_to_yuv.cpp | 149 ++++++++++++ 13 files changed, 659 insertions(+), 13 deletions(-) create mode 100644 conformity/opencv/test_rgb2yuv.cpp create mode 100644 conformity/opencv/test_rgb2yuv.h create mode 100644 kleidicv/include/kleidicv/conversions/rgb_to_yuv.h create mode 100644 kleidicv/src/conversions/rgb_to_yuv_api.cpp create mode 100644 kleidicv/src/conversions/rgb_to_yuv_neon.cpp create mode 100644 test/api/test_rgb_to_yuv.cpp diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 84ed53f61..9cb32ae4d 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -173,6 +173,44 @@ int yuv_to_bgr_ex(const uchar *y_data, size_t y_step, const uchar *uv_data, return CV_HAL_ERROR_NOT_IMPLEMENTED; } +int bgr_to_yuv(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int depth, int scn, + bool swapBlue, bool isCbCr) { + const bool is_bgr = !swapBlue; + + if (depth != CV_8U || isCbCr) { + return CV_HAL_ERROR_NOT_IMPLEMENTED; + } + + if (scn == 3) { + if (is_bgr) { + return convert_error(kleidicv_bgr_to_yuv_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height))); + } + return convert_error(kleidicv_rgb_to_yuv_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height))); + } + + if (scn == 4) { + if (is_bgr) { + return convert_error(kleidicv_bgra_to_yuv_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height))); + } + return convert_error(kleidicv_rgba_to_yuv_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height))); + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + int threshold(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType) { diff --git a/adapters/opencv/kleidicv_hal.h b/adapters/opencv/kleidicv_hal.h index fbf6720c3..a25be7719 100644 --- a/adapters/opencv/kleidicv_hal.h +++ b/adapters/opencv/kleidicv_hal.h @@ -37,6 +37,10 @@ int yuv_to_bgr_ex(const uchar *y_data, size_t y_step, const uchar *uv_data, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx); +int bgr_to_yuv(const uchar *src_data, size_t src_step, uchar *dst_data, + size_t dst_step, int width, int height, int depth, int scn, + bool swapBlue, bool isCbCr); + int threshold(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType); @@ -151,6 +155,17 @@ static inline int kleidicv_yuv_to_bgr_ex_with_fallback( #undef cv_hal_cvtTwoPlaneYUVtoBGREx #define cv_hal_cvtTwoPlaneYUVtoBGREx kleidicv_yuv_to_bgr_ex_with_fallback +// bgr_to_yuv +static inline int kleidicv_bgr_to_yuv_with_fallback( + const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, + int width, int height, int depth, int scn, bool swapBlue, bool isCbCr) { + return KLEIDICV_HAL_FALLBACK_FORWARD(bgr_to_yuv, cv_hal_cvtBGRtoYUV, src_data, + src_step, dst_data, dst_step, width, + height, depth, scn, swapBlue, isCbCr); +} +#undef cv_hal_cvtBGRtoYUV +#define cv_hal_cvtBGRtoYUV kleidicv_bgr_to_yuv_with_fallback + // threshold static inline int kleidicv_threshold_with_fallback( const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 0286d6339..b66e4d7be 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -41,6 +41,36 @@ BENCH_BINARY_OP(saturating_add_s8, int8_t); BENCH_BINARY_OP(saturating_sub_u16, uint16_t); BENCH_BINARY_OP(saturating_absdiff_s32, int32_t); +template +static void bench_unary_op(Function f, size_t channels, + benchmark::State& state) { + // Setup + std::vector src, dst; + src.resize(image_width * image_height * channels); + dst.resize(image_width * image_height * channels); + + std::mt19937 generator; + std::generate(src.begin(), src.end(), generator); + + for (auto _ : state) { + // This code gets benchmarked + auto unused = f(src.data(), image_width, dst.data(), image_width, + image_width, image_height); + (void)unused; + } +} + +#define BENCH_UNARY_OP(name, channels, type) \ + static void name(benchmark::State& state) { \ + bench_unary_op(kleidicv_##name, channels, state); \ + } \ + BENCHMARK(name) + +BENCH_UNARY_OP(rgb_to_yuv_u8, 3, uint8_t); +BENCH_UNARY_OP(rgba_to_yuv_u8, 4, uint8_t); +BENCH_UNARY_OP(bgr_to_yuv_u8, 3, uint8_t); +BENCH_UNARY_OP(bgra_to_yuv_u8, 4, uint8_t); + static void min_max_loc_u8(benchmark::State& state) { // Setup std::vector src; diff --git a/conformity/opencv/CMakeLists.txt b/conformity/opencv/CMakeLists.txt index 360b44619..74ea42d6e 100644 --- a/conformity/opencv/CMakeLists.txt +++ b/conformity/opencv/CMakeLists.txt @@ -30,8 +30,9 @@ add_executable( manager manager.cpp tests.cpp - test_sobel.cpp test_gaussian_blur.cpp + test_rgb2yuv.cpp + test_sobel.cpp ) target_link_libraries( @@ -66,8 +67,9 @@ add_executable( subordinate subordinate.cpp tests.cpp - test_sobel.cpp test_gaussian_blur.cpp + test_rgb2yuv.cpp + test_sobel.cpp ) target_link_libraries( diff --git a/conformity/opencv/test_rgb2yuv.cpp b/conformity/opencv/test_rgb2yuv.cpp new file mode 100644 index 000000000..e6633bf3c --- /dev/null +++ b/conformity/opencv/test_rgb2yuv.cpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "test_rgb2yuv.h" + +#include + +template +cv::Mat exec_rgb2yuv(cv::Mat& input) { + cv::Mat result; + if constexpr (SwitchBlue) { + cv::cvtColor(input, result, cv::COLOR_BGR2YUV); + } else { + cv::cvtColor(input, result, cv::COLOR_RGB2YUV); + } + return result; +} + +#if MANAGER +template +bool test_rgb2yuv(int index, RecreatedMessageQueue& request_queue, + RecreatedMessageQueue& reply_queue) { + cv::RNG rng(0); + + for (size_t x = 5; x <= 16; ++x) { + for (size_t y = 5; y <= 16; ++y) { + cv::Mat input(x, y, CV_8UC(Channels)); + rng.fill(input, cv::RNG::UNIFORM, 0, 255); + + cv::Mat actual = exec_rgb2yuv(input); + cv::Mat expected = get_expected_from_subordinate(index, request_queue, + reply_queue, input); + + if (are_matrices_different(0, actual, expected)) { + fail_print_matrices(x, y, input, actual, expected); + return true; + } + } + } + + return false; +} +#endif + +std::vector& rgb2yuv_tests_get() { + // clang-format off + static std::vector tests = { + TEST("RGB2YUV", (test_rgb2yuv), exec_rgb2yuv), + TEST("RGBA2YUV", (test_rgb2yuv), exec_rgb2yuv), + TEST("BGR2YUV", (test_rgb2yuv), exec_rgb2yuv), + TEST("BGRA2YUV", (test_rgb2yuv), exec_rgb2yuv), + }; + // clang-format on + return tests; +} diff --git a/conformity/opencv/test_rgb2yuv.h b/conformity/opencv/test_rgb2yuv.h new file mode 100644 index 000000000..03d5687be --- /dev/null +++ b/conformity/opencv/test_rgb2yuv.h @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_OPENCV_CONFORMITY_TEST_RGB2YUV_H_ +#define KLEIDICV_OPENCV_CONFORMITY_TEST_RGB2YUV_H_ + +#include + +#include "tests.h" + +std::vector& rgb2yuv_tests_get(); + +#endif // KLEIDICV_OPENCV_CONFORMITY_TEST_RGB2YUV_H_ diff --git a/conformity/opencv/tests.cpp b/conformity/opencv/tests.cpp index e5fd49e6f..2c02e39fb 100644 --- a/conformity/opencv/tests.cpp +++ b/conformity/opencv/tests.cpp @@ -11,6 +11,7 @@ #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" #include "test_gaussian_blur.h" +#include "test_rgb2yuv.h" #include "test_sobel.h" static std::vector merge_tests( @@ -24,8 +25,9 @@ static std::vector merge_tests( } std::vector all_tests = merge_tests({ - sobel_tests_get, gaussian_blur_tests_get, + rgb2yuv_tests_get, + sobel_tests_get, }); #if MANAGER diff --git a/kleidicv/include/kleidicv/conversions/rgb_to_yuv.h b/kleidicv/include/kleidicv/conversions/rgb_to_yuv.h new file mode 100644 index 000000000..c45f7adc8 --- /dev/null +++ b/kleidicv/include/kleidicv/conversions/rgb_to_yuv.h @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_CONVERSIONS_RGB_TO_YUV_H +#define KLEIDICV_CONVERSIONS_RGB_TO_YUV_H + +#include "kleidicv/kleidicv.h" + +namespace kleidicv { + +/* +Analog YUV to RGB conversion according to ITU-R BT.601-7: + + Y = 0.299 * R + 0.587 * G + 0.114 * B; + U = (B - Y) / 1.772 = (B - Y) * 0,5643340858 + V = (R - Y) / 1.402 = (R - Y) * 0,7132667618 + +With 14-bit scaling and rounding, the integer constants are: + + Y = 4899 * R + 9617 * G + 1868 * B + U = (B - Y) * 8061 + V = (R - Y) * 14369 + +The final results are calculated using rounding shift right and saturating +to 8-bit unsigned values: + + X = saturating_cast((X' + (1 << 13)) >> 14) + +Sources: + [1] https://www.itu.int/rec/R-REC-BT.601 +*/ + +// Weights according to the calculation at the top of this file. +static constexpr size_t kWeightScale = 14; +static constexpr int16_t kRYWeight = 4899; +static constexpr int16_t kGYWeight = 9617; +static constexpr int16_t kBYWeight = 1868; +static constexpr int16_t kRVWeight = 14369; +static constexpr int16_t kBUWeight = 8061; + +namespace neon { + +kleidicv_error_t bgr_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + +kleidicv_error_t rgb_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + +kleidicv_error_t bgra_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + +kleidicv_error_t rgba_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); +} // namespace neon + +} // namespace kleidicv + +#endif // KLEIDICV_CONVERSIONS_RGB_TO_YUV_H diff --git a/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h b/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h index 3ec746e94..cfe77e796 100644 --- a/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h +++ b/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h @@ -18,7 +18,7 @@ namespace kleidicv { After re-normalization of the analog signal: Yan = Ya - Uan = Ua / 1.722 + Uan = Ua / 1.772 Van = Va / 1.402 [ Yan ] = [ 0.299000 0.587000 0.114000 ] [ Ra ] @@ -51,14 +51,14 @@ The values used in this implementation are the following: [ G ] = [ 1.164000 -0.391000 -0.813000 ] [ U' ] [ B ] = [ 1.164000 2.018000 0.000000 ] [ V' ] -With 20 bit scaling and rounding, the integer constants are: +With 20-bit scaling and rounding, the integer constants are: [ R ] = [ 1,220,542 0 1,673,527 ] [ Y' ] [ G ] = [ 1,220,542 - 409,993 -852,492 ] [ U' ] + (1 << 19) >> 20 [ B ] = [ 1,220,542 2,116,026 0 ] [ V' ] -The final results are calculated using roundign shift right and saturating -to 8 bit unsigned values: +The final results are calculated using rounding shift right and saturating +to 8-bit unsigned values: X = saturating_cast((X' + (1 << 19)) >> 20) diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index f4052e6a8..fb69d3233 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -510,7 +510,8 @@ KLEIDICV_API_DECLARATION(kleidicv_rgba_to_rgb_u8, const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); -/// Converts an NV12 or NV21 YUV image to RGB. All channels are 8-bit wide. +/// Converts an NV12 or NV21 (Semi-Planar) YUV image to RGB. All channels are +/// 8-bit wide. /// /// Destination data is filled liked this: /// | R,G,B | R,G,B | R,G,B | ... @@ -547,7 +548,8 @@ KLEIDICV_API_DECLARATION(kleidicv_yuv_sp_to_rgb_u8, const uint8_t *src_y, size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, bool is_nv21); -/// Converts an NV12 or NV21 YUV image to BGR. All channels are 8-bit wide. +/// Converts an NV12 or NV21 (Semi-Planar) YUV image to BGR. All channels are +/// 8-bit wide. /// /// Destination data is filled liked this: /// | B,G,R | B,G,R | B,G,R | ... @@ -584,8 +586,8 @@ KLEIDICV_API_DECLARATION(kleidicv_yuv_sp_to_bgr_u8, const uint8_t *src_y, size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, bool is_nv21); -/// Converts an NV12 or NV21 YUV image to RGBA. All channels are 8-bit wide. -/// Alpha channel is set to 0xFF. +/// Converts an NV12 or NV21 (Semi-Planar) YUV image to RGBA. All channels are +/// 8-bit wide. Alpha channel is set to 0xFF. /// /// Destination data is filled liked this: /// | R,G,B,A | R,G,B,A | R,G,B,A | ... @@ -620,8 +622,8 @@ KLEIDICV_API_DECLARATION(kleidicv_yuv_sp_to_rgba_u8, const uint8_t *src_y, size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, bool is_nv21); -/// Converts an NV12 or NV21 YUV image to BGRA. All channels are 8-bit wide. -/// Alpha channel is set to 0xFF. +/// Converts an NV12 or NV21 (Semi-Planar) YUV image to BGRA. All channels are +/// 8-bit wide. Alpha channel is set to 0xFF. /// /// Destination data is filled liked this: /// | B,G,R,A | B,G,R,A | B,G,R,A | ... @@ -656,6 +658,50 @@ KLEIDICV_API_DECLARATION(kleidicv_yuv_sp_to_bgra_u8, const uint8_t *src_y, size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, bool is_nv21); +/// Converts an RGB image to YUV, pixel by pixel. All channels are 8-bit wide. +/// +/// Source data can have 3 channels or 4 if there is an alpha channel: +/// - R,G,B +/// - R,G,B,Alpha +/// - B,G,R +/// - B,G,R,Alpha +/// +/// Destination data is filled liked this: +/// | Y,U,V | Y,U,V | Y,U,V | ... +/// One pixel is represented by 3 bytes. There is no padding between the pixels. +/// Alpha channel is not used in the conversion. +/// +/// Width and height are the same for the source and for the destination. Number +/// of pixels is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. +/// Must not be less than width * (number of channels) * +/// sizeof(uint8). +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must not be less than width * 3 * sizeof(uint8). +/// @param width Number of pixels in a row. +/// @param height Number of rows in the data. +/// +KLEIDICV_API_DECLARATION(kleidicv_bgr_to_yuv_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_bgr_to_yuv_u8 +KLEIDICV_API_DECLARATION(kleidicv_rgb_to_yuv_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_bgr_to_yuv_u8 +KLEIDICV_API_DECLARATION(kleidicv_bgra_to_yuv_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_bgr_to_yuv_u8 +KLEIDICV_API_DECLARATION(kleidicv_rgba_to_yuv_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); + /// Performs a comparison of each element's value in `src` with respect to a /// caller defined threshold. The strictly larger elements are set to /// `value` and the rest to 0. diff --git a/kleidicv/src/conversions/rgb_to_yuv_api.cpp b/kleidicv/src/conversions/rgb_to_yuv_api.cpp new file mode 100644 index 000000000..baa4a3f39 --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv_api.cpp @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv.h" +#include "kleidicv/dispatch.h" +#include "kleidicv/kleidicv.h" + +#define KLEIDICV_DEFINE_C_API(name, partialname) \ + KLEIDICV_MULTIVERSION_C_API(name, &kleidicv::neon::partialname, nullptr, \ + nullptr) + +KLEIDICV_DEFINE_C_API(kleidicv_rgb_to_yuv_u8, rgb_to_yuv_u8); +KLEIDICV_DEFINE_C_API(kleidicv_bgr_to_yuv_u8, bgr_to_yuv_u8); +KLEIDICV_DEFINE_C_API(kleidicv_rgba_to_yuv_u8, rgba_to_yuv_u8); +KLEIDICV_DEFINE_C_API(kleidicv_bgra_to_yuv_u8, bgra_to_yuv_u8); diff --git a/kleidicv/src/conversions/rgb_to_yuv_neon.cpp b/kleidicv/src/conversions/rgb_to_yuv_neon.cpp new file mode 100644 index 000000000..0b0dd6430 --- /dev/null +++ b/kleidicv/src/conversions/rgb_to_yuv_neon.cpp @@ -0,0 +1,215 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "kleidicv/conversions/rgb_to_yuv.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/neon.h" + +namespace kleidicv::neon { + +template +class RGBToYUVAll final : public UnrollOnce, public TryToAvoidTailLoop { + public: + using VecTraits = neon::VecTraits; + using ScalarType = VecTraits::ScalarType; + using VectorType = VecTraits::VectorType; + using RawSourceVectorType = + typename std::conditional::type; + + explicit RGBToYUVAll() = default; + + // Returns the number of channels in the output image. + static constexpr size_t input_channels() { + return ALPHA ? /* RGBA */ 4 : /* RGB */ 3; + } + + void vector_path(const ScalarType *src, ScalarType *dst) { + RawSourceVectorType vsrc; + int16x8_t r_l, r_h, g_l, g_h, b_l, b_h; + if constexpr (ALPHA) { + vsrc = vld1q_u8_x4(src); + uint16x8_t rb_l = vuzp1q_u8(vsrc.val[0], vsrc.val[1]); + uint16x8_t rb_h = vuzp1q_u8(vsrc.val[2], vsrc.val[3]); + if constexpr (BGR) { + b_l = vreinterpretq_s16_u8(vtrn1q_u8(rb_l, vdupq_n_u8(0))); + b_h = vreinterpretq_s16_u8(vtrn1q_u8(rb_h, vdupq_n_u8(0))); + r_l = vreinterpretq_s16_u8(vtrn2q_u8(rb_l, vdupq_n_u8(0))); + r_h = vreinterpretq_s16_u8(vtrn2q_u8(rb_h, vdupq_n_u8(0))); + } else { + r_l = vreinterpretq_s16_u8(vtrn1q_u8(rb_l, vdupq_n_u8(0))); + r_h = vreinterpretq_s16_u8(vtrn1q_u8(rb_h, vdupq_n_u8(0))); + b_l = vreinterpretq_s16_u8(vtrn2q_u8(rb_l, vdupq_n_u8(0))); + b_h = vreinterpretq_s16_u8(vtrn2q_u8(rb_h, vdupq_n_u8(0))); + } + uint16x8_t ga_l = vuzp2q_u8(vsrc.val[0], vsrc.val[1]); + g_l = vreinterpretq_s16_u8(vtrn1q_u8(ga_l, vdupq_n_u8(0))); + uint16x8_t ga_h = vuzp2q_u8(vsrc.val[2], vsrc.val[3]); + g_h = vreinterpretq_s16_u8(vtrn1q_u8(ga_h, vdupq_n_u8(0))); + } else { + // Load deinterleaved + vsrc = vld3q_u8(src); + r_l = vreinterpretq_s16_u8(vzip1q_u8(vsrc.val[r_index_], vdupq_n_u8(0))); + r_h = vreinterpretq_s16_u8(vzip2q_u8(vsrc.val[r_index_], vdupq_n_u8(0))); + g_l = vreinterpretq_s16_u8(vzip1q_u8(vsrc.val[g_index_], vdupq_n_u8(0))); + g_h = vreinterpretq_s16_u8(vzip2q_u8(vsrc.val[g_index_], vdupq_n_u8(0))); + b_l = vreinterpretq_s16_u8(vzip1q_u8(vsrc.val[b_index_], vdupq_n_u8(0))); + b_h = vreinterpretq_s16_u8(vzip2q_u8(vsrc.val[b_index_], vdupq_n_u8(0))); + } + // Compute Y value in 32-bit precision + int16x8_t y_l, y_h; + { + int32x4_t y_ll = vmull_n_s16(vget_low_s16(r_l), kRYWeight); + int32x4_t y_hl = vmull_n_s16(vget_low_s16(r_h), kRYWeight); + int32x4_t y_lh = vmull_high_n_s16(r_l, kRYWeight); + int32x4_t y_hh = vmull_high_n_s16(r_h, kRYWeight); + + y_ll = vmlal_n_s16(y_ll, vget_low_s16(g_l), kGYWeight); + y_hl = vmlal_n_s16(y_hl, vget_low_s16(g_h), kGYWeight); + y_lh = vmlal_high_n_s16(y_lh, g_l, kGYWeight); + y_hh = vmlal_high_n_s16(y_hh, g_h, kGYWeight); + + y_ll = vmlal_n_s16(y_ll, vget_low_s16(b_l), kBYWeight); + y_hl = vmlal_n_s16(y_hl, vget_low_s16(b_h), kBYWeight); + y_lh = vmlal_high_n_s16(y_lh, b_l, kBYWeight); + y_hh = vmlal_high_n_s16(y_hh, b_h, kBYWeight); + + y_l = combine_scaled_s16(y_ll, y_lh); + y_h = combine_scaled_s16(y_hl, y_hh); + } + + // Using the 16-bit Y value, calculate U + int16x8_t u_l, u_h; + { + int16x8_t uy_l = vqsubq(b_l, y_l); + int16x8_t uy_h = vqsubq(b_h, y_h); + + int32x4_t u_ll = vdupq_n_s32(half_); + int32x4_t u_lh = u_ll; + int32x4_t u_hl = u_ll; + int32x4_t u_hh = u_ll; + + u_ll = vmlal_n_s16(u_ll, vget_low_s16(uy_l), kBUWeight); + u_hl = vmlal_n_s16(u_hl, vget_low_s16(uy_h), kBUWeight); + u_lh = vmlal_high_n_s16(u_lh, uy_l, kBUWeight); + u_hh = vmlal_high_n_s16(u_hh, uy_h, kBUWeight); + + u_l = combine_scaled_s16(u_ll, u_lh); + u_h = combine_scaled_s16(u_hl, u_hh); + } + + // Using the 16-bit Y value, calculate V + int16x8_t v_l, v_h; + { + int16x8_t vy_l = vqsubq(r_l, y_l); + int16x8_t vy_h = vqsubq(r_h, y_h); + + int32x4_t v_ll = vdupq_n_s32(half_); + int32x4_t v_lh = v_ll; + int32x4_t v_hl = v_ll; + int32x4_t v_hh = v_ll; + + v_ll = vmlal_n_s16(v_ll, vget_low_s16(vy_l), kRVWeight); + v_hl = vmlal_n_s16(v_hl, vget_low_s16(vy_h), kRVWeight); + v_lh = vmlal_high_n_s16(v_lh, vy_l, kRVWeight); + v_hh = vmlal_high_n_s16(v_hh, vy_h, kRVWeight); + + v_l = combine_scaled_s16(v_ll, v_lh); + v_h = combine_scaled_s16(v_hl, v_hh); + } + + // Narrow the results to 8 bits + uint8x16x3_t yuv; + yuv.val[0] = vcombine_u8(vqmovun_s16(y_l), vqmovun_s16(y_h)); + yuv.val[1] = vcombine_u8(vqmovun_s16(u_l), vqmovun_s16(u_h)); + yuv.val[2] = vcombine_u8(vqmovun_s16(v_l), vqmovun_s16(v_h)); + + // Store interleaved YUV pixels to memory. + vst3q_u8(dst, yuv); + } + + void scalar_path(const ScalarType *src, ScalarType *dst) { + int32_t y = src[r_index_] * kRYWeight + src[g_index_] * kGYWeight + + src[b_index_] * kBYWeight; + y = rounding_shift_right(y, kWeightScale); + int32_t u = (src[b_index_] - y) * kBUWeight + half_; + u = rounding_shift_right(u, kWeightScale); + int32_t v = (src[r_index_] - y) * kRVWeight + half_; + v = rounding_shift_right(v, kWeightScale); + dst[0] = saturating_cast(y); + dst[1] = saturating_cast(u); + dst[2] = saturating_cast(v); + } + + private: + static constexpr size_t r_index_ = BGR ? 2 : 0; + static constexpr size_t g_index_ = 1; + static constexpr size_t b_index_ = BGR ? 0 : 2; + static constexpr size_t step_ = ALPHA ? 4 : 3; + static constexpr uint32_t half_ = + (std::numeric_limits::max() / 2 + 1U) << kWeightScale; + + static int16x8_t combine_scaled_s16(int32x4_t a, int32x4_t b) { + return vrshrn_high_n_s32(vrshrn_n_s32(a, kWeightScale), b, kWeightScale); + } +}; // end of class RGBToYUVAll + +template +kleidicv_error_t rgb2yuv_operation(OperationType &operation, + const ScalarType *src, size_t src_stride, + ScalarType *dst, size_t dst_stride, + size_t width, size_t height) { + CHECK_POINTER_AND_STRIDE(src, src_stride); + CHECK_POINTER_AND_STRIDE(dst, dst_stride); + CHECK_IMAGE_SIZE(width, height); + + Rectangle rect{width, height}; + Rows src_rows{src, src_stride, operation.input_channels()}; + Rows dst_rows{dst, dst_stride, 3}; + + apply_operation_by_rows(operation, rect, src_rows, dst_rows); + return KLEIDICV_OK; +} + +using RGBToYUV = RGBToYUVAll; +using RGBAToYUV = RGBToYUVAll; +using BGRToYUV = RGBToYUVAll; +using BGRAToYUV = RGBToYUVAll; + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgb_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + RGBToYUV operation; + return rgb2yuv_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t rgba_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + RGBAToYUV operation; + return rgb2yuv_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgr_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + BGRToYUV operation; + return rgb2yuv_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t bgra_to_yuv_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + BGRAToYUV operation; + return rgb2yuv_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + +} // namespace kleidicv::neon diff --git a/test/api/test_rgb_to_yuv.cpp b/test/api/test_rgb_to_yuv.cpp new file mode 100644 index 000000000..0a848ae36 --- /dev/null +++ b/test/api/test_rgb_to_yuv.cpp @@ -0,0 +1,149 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "framework/array.h" +#include "framework/utils.h" +#include "kleidicv/kleidicv.h" +#include "kleidicv/utils.h" +#include "test_config.h" + +class RGB2YUVTest final { + public: + RGB2YUVTest(size_t channel_number, bool switch_blue) + : channel_number_(channel_number), switch_blue_(switch_blue) {} + + template + void execute_scalar_test(F impl) { + size_t scalar_path_width = 5; + execute_test(impl, scalar_path_width, 0); + // Padding version + execute_test(impl, scalar_path_width, test::Options::vector_length()); + } + + template + void execute_vector_test(F impl) { + size_t vector_path_width = (2 * test::Options::vector_lanes()) - 3; + execute_test(impl, vector_path_width, 0); + // Padding version + execute_test(impl, vector_path_width, test::Options::vector_length()); + } + + private: + template + void execute_test(F impl, size_t logical_width, size_t padding) { + test::Array2D src{logical_width * channel_number_, 5, padding}; + src.fill(0); + src.set(0, 0, {0, 22, 4, 0, 27, 9, 255, 125, 255, 255, 60, 255}); + src.set(1, 0, {0, 154, 0, 0, 154, 0, 61, 255, 11, 47, 255, 0}); + src.set(2, 0, {0, 22, 4, 76, 143, 125, 203, 0, 255, 204, 0, 255}); + src.set(4, 0, {0, 145, 0, 0, 145, 0, 0, 255, 0, 0, 255, 0}); + + test::Array2D expected{logical_width * 3, src.height(), padding}; + expected.fill(0); + calculate_expected(src, expected); + + test::Array2D actual{logical_width * 3, src.height(), padding}; + actual.fill(42); + auto err = impl(src.data(), src.stride(), actual.data(), actual.stride(), + expected.width() / 3, expected.height()); + + ASSERT_EQ(KLEIDICV_OK, err); + EXPECT_EQ_ARRAY2D(expected, actual); + + test::test_null_args(impl, src.data(), src.stride(), actual.data(), + actual.stride(), expected.width() / channel_number_, + expected.height()); + + EXPECT_EQ(KLEIDICV_OK, impl(src.data(), src.stride(), actual.data(), + actual.stride(), 0, 1)); + EXPECT_EQ(KLEIDICV_OK, impl(src.data(), src.stride(), actual.data(), + actual.stride(), 1, 0)); + + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + impl(src.data(), src.stride(), actual.data(), actual.stride(), + KLEIDICV_MAX_IMAGE_PIXELS + 1, 1)); + EXPECT_EQ(KLEIDICV_ERROR_RANGE, + impl(src.data(), src.stride(), actual.data(), actual.stride(), + KLEIDICV_MAX_IMAGE_PIXELS, KLEIDICV_MAX_IMAGE_PIXELS)); + } + + void calculate_expected(test::Array2D &src_arr, + test::Array2D &exp_arr) const { + for (size_t vindex = 0; vindex < exp_arr.height(); vindex++) { + for (size_t hindex = 0; hindex < exp_arr.width() / 3; hindex++) { + // NOLINTBEGIN(clang-analyzer-core.uninitialized.Assign) + int32_t r = *src_arr.at( + vindex, hindex * channel_number_ + (switch_blue_ ? 2 : 0)); + int32_t g = *src_arr.at(vindex, hindex * channel_number_ + 1); + int32_t b = *src_arr.at( + vindex, hindex * channel_number_ + (switch_blue_ ? 0 : 2)); + // NOLINTEND(clang-analyzer-core.uninitialized.Assign) + + static const int32_t R2Y = 4899, G2Y = 9617, B2Y = 1868; + static const int32_t R2V = 14369, B2U = 8061; + + int32_t y = (r * R2Y + g * G2Y + b * B2Y + (1 << 13)) >> 14; + int32_t u = (((b - y) * B2U + (1 << 13)) >> 14) + 128; + int32_t v = (((r - y) * R2V + (1 << 13)) >> 14) + 128; + + uint8_t y_u8 = saturate_cast_s32_to_u8(y); + uint8_t u_u8 = saturate_cast_s32_to_u8(u); + uint8_t v_u8 = saturate_cast_s32_to_u8(v); + + exp_arr.set(vindex, hindex * 3, {y_u8, u_u8, v_u8}); + } + } + } + + static uint8_t saturate_cast_s32_to_u8(int32_t rhs) { + return static_cast( + std::min(std::max(0, rhs), + static_cast(std::numeric_limits::max()))); + } + + size_t channel_number_; + bool switch_blue_; +}; + +TEST(RGBToYUV, RGB_YUV_scalar) { + RGB2YUVTest rgb2yuv_test(3, false); + rgb2yuv_test.execute_scalar_test(kleidicv_rgb_to_yuv_u8); +} + +TEST(RGBToYUV, RGB_YUV_vector) { + RGB2YUVTest rgb2yuv_test(3, false); + rgb2yuv_test.execute_vector_test(kleidicv_rgb_to_yuv_u8); +} + +TEST(RGBToYUV, RGBA_YUV_scalar) { + RGB2YUVTest rgb2yuv_test(4, false); + rgb2yuv_test.execute_scalar_test(kleidicv_rgba_to_yuv_u8); +} + +TEST(RGBToYUV, RGBA_YUV_vector) { + RGB2YUVTest rgb2yuv_test(4, false); + rgb2yuv_test.execute_vector_test(kleidicv_rgba_to_yuv_u8); +} + +TEST(RGBToYUV, BGR_YUV_scalar) { + RGB2YUVTest rgb2yuv_test(3, true); + rgb2yuv_test.execute_scalar_test(kleidicv_bgr_to_yuv_u8); +} + +TEST(RGBToYUV, BGR_YUV_vector) { + RGB2YUVTest rgb2yuv_test(3, true); + rgb2yuv_test.execute_vector_test(kleidicv_bgr_to_yuv_u8); +} + +TEST(RGBToYUV, BGRA_YUV_scalar) { + RGB2YUVTest rgb2yuv_test(4, true); + rgb2yuv_test.execute_scalar_test(kleidicv_bgra_to_yuv_u8); +} + +TEST(RGBToYUV, BGRA_YUV_vector) { + RGB2YUVTest rgb2yuv_test(4, true); + rgb2yuv_test.execute_vector_test(kleidicv_bgra_to_yuv_u8); +} -- GitLab