diff --git a/CHANGELOG.md b/CHANGELOG.md index 67dd6e2249d6b926129c58a265661b6d4608c5b9..9b9d9bd91953dcd699abd89a8b3473ee1d5a23f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ This changelog aims to follow the guiding principles of - Median Blur for 3x3 kernels. - Median Blur for generic kernels (odd-sized only, max kernel size 255x255), Neon backend only. - Gaussian Blur for any odd kernel size (up to 255x255) with replicated borders +- Conversion from packed YUV 4:4:4 (interleaved and non-subsampled) to RGBA/BGRA. ### Changed - Performance of Gaussian Blur is greatly improved in return for some accuracy. diff --git a/adapters/opencv/kleidicv_hal.cpp b/adapters/opencv/kleidicv_hal.cpp index 953e924dd87e479df5e13c6c725dbe5b16a31f62..7fa9209f04bc6d085ac895ecffb3f650ec44adc7 100644 --- a/adapters/opencv/kleidicv_hal.cpp +++ b/adapters/opencv/kleidicv_hal.cpp @@ -246,22 +246,39 @@ int yuv_to_bgr(const uchar *src_data, size_t src_step, uchar *dst_data, bool swapBlue, bool isCbCr) { const bool is_bgr = !swapBlue; - if (depth != CV_8U || isCbCr || dcn != 3) { + if (depth != CV_8U || isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } auto mt = get_multithreading(); + if (dcn == 3) { + if (is_bgr) { + return convert_error(kleidicv_thread_yuv_to_bgr_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), mt)); + } - if (is_bgr) { - return convert_error(kleidicv_thread_yuv_to_bgr_u8( + return convert_error(kleidicv_thread_yuv_to_rgb_u8( reinterpret_cast(src_data), src_step, reinterpret_cast(dst_data), dst_step, static_cast(width), static_cast(height), mt)); } - return convert_error(kleidicv_thread_yuv_to_rgb_u8( - reinterpret_cast(src_data), src_step, - reinterpret_cast(dst_data), dst_step, - static_cast(width), static_cast(height), mt)); + + if (dcn == 4) { + if (is_bgr) { + return convert_error(kleidicv_thread_yuv_to_bgra_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), mt)); + } + + return convert_error(kleidicv_thread_yuv_to_rgba_u8( + reinterpret_cast(src_data), src_step, + reinterpret_cast(dst_data), dst_step, + static_cast(width), static_cast(height), mt)); + } + return CV_HAL_ERROR_NOT_IMPLEMENTED; } int bgr_to_yuv(const uchar *src_data, size_t src_step, uchar *dst_data, diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 75d9cf5441decaaea6af89a89f8fd114bef6d913..59e6d3d35828df59413a8e7bfd93289fa5819cf5 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -364,6 +364,8 @@ BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(rgba_to_rgb_u8, 4, 3, uint8_t); BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(yuv_to_rgb_u8, 3, 3, uint8_t); BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(yuv_to_bgr_u8, 3, 3, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(yuv_to_rgba_u8, 3, 4, uint8_t); +BENCH_UNARY_OP_DIFFERENT_CHANNEL_NUMBER(yuv_to_bgra_u8, 3, 4, uint8_t); static void min_max_loc_u8(benchmark::State& state) { bench_functor(state, []() { diff --git a/conformity/opencv/test_yuv2rgb.cpp b/conformity/opencv/test_yuv2rgb.cpp index 4a0573efc9a5a42f4cc76d0a6278ff88a13fe366..4f7494a549c62d6009eeb087c7a1880c76a93dd2 100644 --- a/conformity/opencv/test_yuv2rgb.cpp +++ b/conformity/opencv/test_yuv2rgb.cpp @@ -6,19 +6,19 @@ #include "tests.h" -template +template cv::Mat exec_yuv2rgb(cv::Mat& input) { cv::Mat result; if constexpr (SwitchBlue) { - cv::cvtColor(input, result, cv::COLOR_YUV2RGB); + cv::cvtColor(input, result, cv::COLOR_YUV2RGB, Channels); } else { - cv::cvtColor(input, result, cv::COLOR_YUV2BGR); + cv::cvtColor(input, result, cv::COLOR_YUV2BGR, Channels); } return result; } #if MANAGER -template +template bool test_yuv2rgb(int index, RecreatedMessageQueue& request_queue, RecreatedMessageQueue& reply_queue) { cv::RNG rng(0); @@ -27,8 +27,7 @@ bool test_yuv2rgb(int index, RecreatedMessageQueue& request_queue, for (size_t y = 5; y <= 16; ++y) { cv::Mat input(x, y, CV_8UC3); rng.fill(input, cv::RNG::UNIFORM, 0, 255); - - cv::Mat actual = exec_yuv2rgb(input); + cv::Mat actual = exec_yuv2rgb(input); cv::Mat expected = get_expected_from_subordinate(index, request_queue, reply_queue, input); @@ -46,9 +45,11 @@ bool test_yuv2rgb(int index, RecreatedMessageQueue& request_queue, std::vector& yuv2rgb_tests_get() { // clang-format off static std::vector tests = { - TEST("YUV2RGB", (test_yuv2rgb), exec_yuv2rgb), - TEST("YUV2BGR", (test_yuv2rgb), exec_yuv2rgb) - }; + TEST("YUV2RGB", (test_yuv2rgb), (exec_yuv2rgb)), + TEST("YUV2BGR", (test_yuv2rgb), (exec_yuv2rgb)), + TEST("YUV2RGBA", (test_yuv2rgb), (exec_yuv2rgb)), + TEST("YUV2BGRA", (test_yuv2rgb), (exec_yuv2rgb)) + }; // clang-format on return tests; } diff --git a/doc/functionality.md b/doc/functionality.md index 3c8b9a86c6172748dc1ec9fc8a77d68b3ef96164..43ae983c8825dc80c334ebe3b90bf82ab10454fe 100644 --- a/doc/functionality.md +++ b/doc/functionality.md @@ -43,6 +43,8 @@ See `doc/opencv.md` for details of the functionality available in OpenCV. | YUV420-RGBA | x | | YUV-BGR | x | | YUV-RGB | x | +| YUV-BGRA | x | +| YUV-RGBA | x | | RGB-YUV | x | | RGBA-YUV | x | | BGR-YUV | x | diff --git a/doc/opencv.md b/doc/opencv.md index a581a0cb9a7e26bfe5a5eca39fe94dc82b7b1066..6d0021177b8b54f0354392f43355e5b6768e1a05 100644 --- a/doc/opencv.md +++ b/doc/opencv.md @@ -78,11 +78,11 @@ Notes on parameters: * `dst.channels()` - supports 3 for RGB and 4 for RGBA. #### [`COLOR_YUV2RGB`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab09d8186a9e5aaac83acd157a1be43b0),[`COLOR_YUV2BGR`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0ab053f0cf23ae1b0bfee1964fd9a182c9) -YUV to RGB image conversion, 3 channels to 3 channels, no subsampling.\ +YUV to RGB image conversion, 3 channels to 3 or 4 channels, no subsampling.\ All supported permutations are listed in the table below. -| | RGB | BGR | -|---|-----|-----| -|YUV| x | x | +| | RGB | BGR | RGBA | BGRA | +|---|-----|-----|------|------| +|YUV| x | x | x | x | #### [`COLOR_RGB2YUV`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0adc0f8a1354c98d1701caad4b384e0d18),[`COLOR_BGR2YUV`](https://docs.opencv.org/4.10.0/d8/d01/group__imgproc__color__conversions.html#gga4e0972be5de079fed4e3a10e24ef5ef0a611d58d4a431fdbc294b4c79701f3d1a) RGB/RGBA to YUV image conversion, 3 or 4 channels to 3 channels, no subsampling.\ diff --git a/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h b/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h index 7f3ffeb8d0727653e48a2969c0c57e9dd4c9b45a..70e6c8fae2e41992fa662d400d2e23e64b93f1f1 100644 --- a/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h +++ b/kleidicv/include/kleidicv/conversions/yuv_to_rgb.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -66,10 +66,18 @@ kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +kleidicv_error_t yuv_to_bgra_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + kleidicv_error_t yuv_to_rgb_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +kleidicv_error_t yuv_to_rgba_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + } // namespace neon namespace sve2 { @@ -77,10 +85,18 @@ kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +kleidicv_error_t yuv_to_bgra_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + kleidicv_error_t yuv_to_rgb_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +kleidicv_error_t yuv_to_rgba_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + } // namespace sve2 namespace sme2 { @@ -88,10 +104,18 @@ kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +kleidicv_error_t yuv_to_bgra_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + kleidicv_error_t yuv_to_rgb_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +kleidicv_error_t yuv_to_rgba_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height); + } // namespace sme2 } // namespace kleidicv diff --git a/kleidicv/include/kleidicv/kleidicv.h b/kleidicv/include/kleidicv/kleidicv.h index 5e4b1edaa33dad8166202fe423e30116e835cf34..09761af88fadb00061006d20f51ce2766d7e699d 100644 --- a/kleidicv/include/kleidicv/kleidicv.h +++ b/kleidicv/include/kleidicv/kleidicv.h @@ -721,18 +721,21 @@ KLEIDICV_API_DECLARATION(kleidicv_yuv_sp_to_bgra_u8, const uint8_t *src_y, size_t src_uv_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height, bool is_nv21); -/// Converts a YUV image to RGB, pixel by pixel. All channels are 8-bit wide. +/// Converts a YUV image to RGB or RGBA, pixel by pixel. All channels are 8-bit +/// wide. /// /// Source data has 3 channels like this: /// `| Y,U,V | Y,U,V | Y,U,V | ...` /// One pixel is represented by 3 bytes. There is no padding between the pixels. /// -/// Destination data has 3 channels: +/// Destination data has 3 or 4 channels: /// - R,G,B /// - B,G,R +/// - R,G,B,Alpha +/// - B,G,R,Alpha /// -/// Width and height are the same for the source and for the destination. Number -/// of pixels is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. +/// Width and height are the same for the source and for the destination. +/// Number of pixels is limited to @ref KLEIDICV_MAX_IMAGE_PIXELS. /// /// @param src Pointer to the source data. Must be non-null. /// @param src_stride Distance in bytes from the start of one row to the @@ -740,10 +743,10 @@ KLEIDICV_API_DECLARATION(kleidicv_yuv_sp_to_bgra_u8, const uint8_t *src_y, /// Must not be less than `3 * width`, except for single-row /// images. /// @param dst Pointer to the destination data. Must be non-null. -/// @param dst_stride Distance in bytes from the start of one row to the -/// start of the next row for the destination data. -/// Must not be less than `3 * width`, except for single-row -/// images. +/// @param dst_stride Byte offset between the start of one destination row and +/// the next. +/// Must be at least `(destination channel count) * width`, +/// unless the image has only one row. /// @param width Number of pixels in a row. /// @param height Number of rows in the data. /// @@ -754,6 +757,14 @@ KLEIDICV_API_DECLARATION(kleidicv_yuv_to_bgr_u8, const uint8_t *src, KLEIDICV_API_DECLARATION(kleidicv_yuv_to_rgb_u8, const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height); +/// @copydoc kleidicv_yuv_to_bgr_u8 +KLEIDICV_API_DECLARATION(kleidicv_yuv_to_rgba_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); +/// @copydoc kleidicv_yuv_to_bgr_u8 +KLEIDICV_API_DECLARATION(kleidicv_yuv_to_bgra_u8, const uint8_t *src, + size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height); /// Converts an RGB image to YUV, pixel by pixel. All channels are 8-bit wide. /// diff --git a/kleidicv/src/conversions/rgb_to_yuv_neon.cpp b/kleidicv/src/conversions/rgb_to_yuv_neon.cpp index 55a8b8f0d28620f48cbf7022e263180dbbf2fccd..ee01c935c00a2de837ca8025ded318576dd4acc1 100644 --- a/kleidicv/src/conversions/rgb_to_yuv_neon.cpp +++ b/kleidicv/src/conversions/rgb_to_yuv_neon.cpp @@ -19,7 +19,7 @@ class RGBToYUVAll final : public UnrollOnce, public TryToAvoidTailLoop { explicit RGBToYUVAll() = default; - // Returns the number of channels in the output image. + // Returns the number of channels in the input image. static constexpr size_t input_channels() { return ALPHA ? /* RGBA */ 4 : /* RGB */ 3; } diff --git a/kleidicv/src/conversions/yuv_to_rgb_api.cpp b/kleidicv/src/conversions/yuv_to_rgb_api.cpp index 7cc6430cb62b2248844ea427678d25d15f6bd1bd..7cb0c33e229f9c2afd63f020f3610363b5ff0a52 100644 --- a/kleidicv/src/conversions/yuv_to_rgb_api.cpp +++ b/kleidicv/src/conversions/yuv_to_rgb_api.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -14,3 +14,5 @@ KLEIDICV_DEFINE_C_API(kleidicv_yuv_to_rgb_u8, yuv_to_rgb_u8); KLEIDICV_DEFINE_C_API(kleidicv_yuv_to_bgr_u8, yuv_to_bgr_u8); +KLEIDICV_DEFINE_C_API(kleidicv_yuv_to_bgra_u8, yuv_to_bgra_u8); +KLEIDICV_DEFINE_C_API(kleidicv_yuv_to_rgba_u8, yuv_to_rgba_u8); diff --git a/kleidicv/src/conversions/yuv_to_rgb_neon.cpp b/kleidicv/src/conversions/yuv_to_rgb_neon.cpp index 44dcab9d7f0d63255c4b40002a9198af4a0810c8..485f83c58bfd0a0ed95b97073f8d5bf9768ce89d 100644 --- a/kleidicv/src/conversions/yuv_to_rgb_neon.cpp +++ b/kleidicv/src/conversions/yuv_to_rgb_neon.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -11,19 +11,26 @@ namespace kleidicv::neon { -template +template class YUVToRGBAll final : public UnrollOnce, public TryToAvoidTailLoop { public: using VecTraits = neon::VecTraits; using ScalarType = VecTraits::ScalarType; using VectorType = VecTraits::VectorType; using Vector3Type = VecTraits::Vector3Type; + using RawDestinationVectorType = + typename std::conditional::type; explicit YUVToRGBAll() : b_delta4_(vdupq_n_u32(kBDelta4)), g_delta4_(vdupq_n_u32(kGDelta4)), r_delta4_(vdupq_n_u32(kRDelta4)) {} + // Returns the number of channels in the output image. + static constexpr size_t output_channels() { + return ALPHA ? /* RGBA */ 4 : /* RGB */ 3; + } + void vector_path(const ScalarType *src, ScalarType *dst) { // Load deinterleaved Vector3Type vsrc = vld3q_u8(src); @@ -100,13 +107,18 @@ class YUVToRGBAll final : public UnrollOnce, public TryToAvoidTailLoop { r = vcombine_u8(vqmovun_s16(r_l), vqmovun_s16(r_h)); } - uint8x16x3_t rgb; + RawDestinationVectorType rgb; rgb.val[r_index_] = r; rgb.val[g_index_] = g; rgb.val[b_index_] = b; - - // Store interleaved RGB pixels to memory. - vst3q_u8(dst, rgb); + if constexpr (ALPHA) { + rgb.val[alpha_index_] = vdupq_n_u8(alpha_value); + // Store interleaved RGBA pixels to memory. + vst4q_u8(dst, rgb); + } else { + // Store interleaved RGB pixels to memory. + vst3q_u8(dst, rgb); + } } void scalar_path(const ScalarType *src, ScalarType *dst) { @@ -121,12 +133,17 @@ class YUVToRGBAll final : public UnrollOnce, public TryToAvoidTailLoop { dst[r_index_] = saturating_cast(r); dst[g_index_] = saturating_cast(g); dst[b_index_] = saturating_cast(b); + if constexpr (ALPHA) { + dst[alpha_index_] = alpha_value; + } } private: static constexpr size_t r_index_ = BGR ? 2 : 0; static constexpr size_t g_index_ = 1; static constexpr size_t b_index_ = BGR ? 0 : 2; + static constexpr size_t alpha_index_ = 3; + static constexpr uint8_t alpha_value = std::numeric_limits::max(); int32x4_t b_delta4_, g_delta4_, r_delta4_; }; // end of class YUVToRGBAll @@ -141,14 +158,16 @@ kleidicv_error_t yuv2rgb_operation(OperationType &operation, Rectangle rect{width, height}; Rows src_rows{src, src_stride, 3}; - Rows dst_rows{dst, dst_stride, 3}; + Rows dst_rows{dst, dst_stride, operation.output_channels()}; apply_operation_by_rows(operation, rect, src_rows, dst_rows); return KLEIDICV_OK; } -using YUVToRGB = YUVToRGBAll; -using YUVToBGR = YUVToRGBAll; +using YUVToRGB = YUVToRGBAll; +using YUVToRGBA = YUVToRGBAll; +using YUVToBGR = YUVToRGBAll; +using YUVToBGRA = YUVToRGBAll; KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t yuv_to_rgb_u8(const uint8_t *src, size_t src_stride, @@ -159,6 +178,15 @@ kleidicv_error_t yuv_to_rgb_u8(const uint8_t *src, size_t src_stride, height); } +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t yuv_to_rgba_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + YUVToRGBA operation; + return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, @@ -168,4 +196,13 @@ kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride, height); } +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t yuv_to_bgra_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + YUVToBGRA operation; + return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + } // namespace kleidicv::neon diff --git a/kleidicv/src/conversions/yuv_to_rgb_sc.h b/kleidicv/src/conversions/yuv_to_rgb_sc.h index a2ceaf9cf52861f79feb83b6bf5b9b6f3cfd6842..aee4bbdabde4f5dfd727d81c50047543b9da3cbb 100644 --- a/kleidicv/src/conversions/yuv_to_rgb_sc.h +++ b/kleidicv/src/conversions/yuv_to_rgb_sc.h @@ -1,17 +1,19 @@ -// SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 #ifndef KLEIDICV_YUV_TO_RGB_SC_H #define KLEIDICV_YUV_TO_RGB_SC_H +#include + #include "kleidicv/conversions/yuv_to_rgb.h" #include "kleidicv/kleidicv.h" #include "kleidicv/sve2.h" namespace KLEIDICV_TARGET_NAMESPACE { -template +template class YUVToRGB : public UnrollOnce { public: using ContextType = Context; @@ -19,6 +21,13 @@ class YUVToRGB : public UnrollOnce { using VecTraits = KLEIDICV_TARGET_NAMESPACE::VecTraits; using VectorType = VecTraits::VectorType; using Vector3Type = VecTraits::Vector3Type; + using RawDestinationVectorType = + typename std::conditional::type; + + // Returns the number of channels in the output image. + static constexpr size_t output_channels() KLEIDICV_STREAMING_COMPATIBLE { + return ALPHA ? /* RGBA */ 4 : /* RGB */ 3; + } void vector_path(ContextType ctx, const ScalarType *src, ScalarType *dst) KLEIDICV_STREAMING_COMPATIBLE { @@ -105,17 +114,29 @@ class YUVToRGB : public UnrollOnce { r = svqxtunt(svqxtunb(r_0), r_1); } - // Narrow the results to 8 bits - svuint8x3_t rgb; - if constexpr (BGR) { - rgb = svcreate3(b, g, r); + if constexpr (ALPHA) { + RawDestinationVectorType rgb; + if constexpr (BGR) { + rgb = svcreate4(b, g, r, svdup_u8(alpha_value)); + } else { + rgb = svcreate4(r, g, b, svdup_u8(alpha_value)); + } + + // Narrow to 8 bits and store the pixels with deinterleaving. + svst4_u8(pg, dst, rgb); } else { - rgb = svcreate3(r, g, b); + RawDestinationVectorType rgb; + if constexpr (BGR) { + rgb = svcreate3(b, g, r); + } else { + rgb = svcreate3(r, g, b); + } + + // Narrow to 8 bits and store the pixels with deinterleaving. + svst3_u8(pg, dst, rgb); } - - // Store interleaved RGB pixels to memory. - svst3_u8(pg, dst, rgb); } + static constexpr uint8_t alpha_value = std::numeric_limits::max(); }; // end of class YUVToRGB template @@ -129,7 +150,7 @@ kleidicv_error_t yuv2rgb_operation( Rectangle rect{width, height}; Rows src_rows{src, src_stride, 3}; - Rows dst_rows{dst, dst_stride, 3}; + Rows dst_rows{dst, dst_stride, operation.output_channels()}; apply_operation_by_rows(operation, rect, src_rows, dst_rows); return KLEIDICV_OK; @@ -139,7 +160,16 @@ KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t yuv_to_rgb_u8_sc( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE { - YUVToRGB operation; + YUVToRGB operation; + return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + +KLEIDICV_TARGET_FN_ATTRS +static kleidicv_error_t yuv_to_rgba_u8_sc( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE { + YUVToRGB operation; return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, height); } @@ -148,7 +178,16 @@ KLEIDICV_TARGET_FN_ATTRS static kleidicv_error_t yuv_to_bgr_u8_sc( const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE { - YUVToRGB operation; + YUVToRGB operation; + return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, + height); +} + +KLEIDICV_TARGET_FN_ATTRS +static kleidicv_error_t yuv_to_bgra_u8_sc( + const uint8_t *src, size_t src_stride, uint8_t *dst, size_t dst_stride, + size_t width, size_t height) KLEIDICV_STREAMING_COMPATIBLE { + YUVToRGB operation; return yuv2rgb_operation(operation, src, src_stride, dst, dst_stride, width, height); } diff --git a/kleidicv/src/conversions/yuv_to_rgb_sme2.cpp b/kleidicv/src/conversions/yuv_to_rgb_sme2.cpp index 7ba7b13ab5bbf6a9f7129f03a61482095be5c9ce..82b52ca8b906d2da6695d6b275651b5f0ef327f2 100644 --- a/kleidicv/src/conversions/yuv_to_rgb_sme2.cpp +++ b/kleidicv/src/conversions/yuv_to_rgb_sme2.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -17,4 +17,16 @@ yuv_to_bgr_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, return yuv_to_bgr_u8_sc(src, src_stride, dst, dst_stride, width, height); } +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +yuv_to_rgba_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height) { + return yuv_to_rgba_u8_sc(src, src_stride, dst, dst_stride, width, height); +} + +KLEIDICV_LOCALLY_STREAMING KLEIDICV_TARGET_FN_ATTRS kleidicv_error_t +yuv_to_bgra_u8(const uint8_t *src, size_t src_stride, uint8_t *dst, + size_t dst_stride, size_t width, size_t height) { + return yuv_to_bgra_u8_sc(src, src_stride, dst, dst_stride, width, height); +} + } // namespace kleidicv::sme2 diff --git a/kleidicv/src/conversions/yuv_to_rgb_sve2.cpp b/kleidicv/src/conversions/yuv_to_rgb_sve2.cpp index a77874f18f876bd80f58326b41a35a83162848a1..80e9b68fc18505e593472ee6ce6b39bedd4ec478 100644 --- a/kleidicv/src/conversions/yuv_to_rgb_sve2.cpp +++ b/kleidicv/src/conversions/yuv_to_rgb_sve2.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: 2024 - 2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 @@ -19,4 +19,18 @@ kleidicv_error_t yuv_to_bgr_u8(const uint8_t *src, size_t src_stride, return yuv_to_bgr_u8_sc(src, src_stride, dst, dst_stride, width, height); } +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t yuv_to_rgba_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + return yuv_to_rgba_u8_sc(src, src_stride, dst, dst_stride, width, height); +} + +KLEIDICV_TARGET_FN_ATTRS +kleidicv_error_t yuv_to_bgra_u8(const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride, size_t width, + size_t height) { + return yuv_to_bgra_u8_sc(src, src_stride, dst, dst_stride, width, height); +} + } // namespace kleidicv::sve2 diff --git a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h index 5e14ab8f7aff169eb53e48c601495ce160bddb7b..2a6bcebe8dc3ea6a79e35af93baba72347b2f111 100644 --- a/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h +++ b/kleidicv_thread/include/kleidicv_thread/kleidicv_thread.h @@ -63,7 +63,9 @@ KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_rgb_to_rgba_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_rgba_to_bgr_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_rgba_to_rgb_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_yuv_to_bgr_u8, uint8_t, uint8_t); +KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_yuv_to_bgra_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_yuv_to_rgb_u8, uint8_t, uint8_t); +KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_yuv_to_rgba_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_bgr_to_yuv_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_rgb_to_yuv_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP(kleidicv_thread_bgra_to_yuv_u8, uint8_t, uint8_t); diff --git a/kleidicv_thread/src/kleidicv_thread.cpp b/kleidicv_thread/src/kleidicv_thread.cpp index ca4be36a2e37a46c97a2f4f232513f2892c07009..aef62654e8285d0f03f6b7a339b8b011fe82e7dd 100644 --- a/kleidicv_thread/src/kleidicv_thread.cpp +++ b/kleidicv_thread/src/kleidicv_thread.cpp @@ -125,7 +125,9 @@ KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_rgba_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_bgr_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP_IMPL(rgba_to_rgb_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_bgr_u8, uint8_t, uint8_t); +KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_bgra_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_rgb_u8, uint8_t, uint8_t); +KLEIDICV_THREAD_UNARY_OP_IMPL(yuv_to_rgba_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP_IMPL(bgr_to_yuv_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP_IMPL(rgb_to_yuv_u8, uint8_t, uint8_t); KLEIDICV_THREAD_UNARY_OP_IMPL(bgra_to_yuv_u8, uint8_t, uint8_t); diff --git a/scripts/benchmark/benchmarks.txt b/scripts/benchmark/benchmarks.txt index cd94e9b605ae97a6d4bc1d3bfedc853015d20734..7427c3e189ff84e3575a9dd62a6a9c4db427c050 100755 --- a/scripts/benchmark/benchmarks.txt +++ b/scripts/benchmark/benchmarks.txt @@ -24,6 +24,8 @@ BGRA2YUV: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, CX_BGRA2YUV YUV2RGB: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, COLOR_YUV2RGB)' YUV2BGR: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, COLOR_YUV2BGR)' +YUV2RGBA: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, COLOR_YUV2RGBA)' +YUV2BGRA: opencv_perf_imgproc '*cvtColor8u/*' '($PIXEL_FORMAT, COLOR_YUV2BGRA)' BinaryThreshold: opencv_perf_imgproc '*ThreshFixture_Threshold.Threshold/*' '($PIXEL_FORMAT, 8UC1, THRESH_BINARY)' diff --git a/test/api/test_thread.cpp b/test/api/test_thread.cpp index 9a3263dc5256ff461ef8a8722ea413e8761694b5..d4df636fc728d5950c61b636c6e8604e239893f7 100644 --- a/test/api/test_thread.cpp +++ b/test/api/test_thread.cpp @@ -323,6 +323,8 @@ TEST_UNARY_OP(rgba_to_bgr_u8, uint8_t, uint8_t, 4, 3); TEST_UNARY_OP(rgba_to_rgb_u8, uint8_t, uint8_t, 4, 3); TEST_UNARY_OP(yuv_to_bgr_u8, uint8_t, uint8_t, 3, 3); TEST_UNARY_OP(yuv_to_rgb_u8, uint8_t, uint8_t, 3, 3); +TEST_UNARY_OP(yuv_to_bgra_u8, uint8_t, uint8_t, 3, 4); +TEST_UNARY_OP(yuv_to_rgba_u8, uint8_t, uint8_t, 3, 4); TEST_UNARY_OP(bgr_to_yuv_u8, uint8_t, uint8_t, 3, 3); TEST_UNARY_OP(rgb_to_yuv_u8, uint8_t, uint8_t, 3, 3); TEST_UNARY_OP(bgra_to_yuv_u8, uint8_t, uint8_t, 4, 3); diff --git a/test/api/test_yuv_to_rgb.cpp b/test/api/test_yuv_to_rgb.cpp index e3766287234d689cf1520cd1c8bc0b1c10a68a95..ab79dd5e1f20cb9aea94d781dc46f36bbea91430 100644 --- a/test/api/test_yuv_to_rgb.cpp +++ b/test/api/test_yuv_to_rgb.cpp @@ -12,7 +12,8 @@ class YuvToRgbTest final { public: - explicit YuvToRgbTest(bool switch_blue) : switch_blue_(switch_blue) {} + explicit YuvToRgbTest(size_t channels, bool switch_blue) + : kDstChannels{channels}, switch_blue_(switch_blue) {} template void execute_scalar_test(F impl) { @@ -31,7 +32,7 @@ class YuvToRgbTest final { } private: - static const size_t kDstChannels = 3; + size_t kDstChannels; bool switch_blue_; template @@ -99,29 +100,54 @@ class YuvToRgbTest final { uint8_t c0_u8 = saturate_cast_s32_to_u8(switch_blue_ ? b : r); uint8_t c1_u8 = saturate_cast_s32_to_u8(g); uint8_t c2_u8 = saturate_cast_s32_to_u8(switch_blue_ ? r : b); - - exp_arr.set(vindex, hindex * kDstChannels, {c0_u8, c1_u8, c2_u8}); + if (kDstChannels == 3) { + exp_arr.set(vindex, hindex * kDstChannels, {c0_u8, c1_u8, c2_u8}); + } else { + exp_arr.set( + vindex, hindex * kDstChannels, + {c0_u8, c1_u8, c2_u8, std::numeric_limits::max()}); + } } } } }; TEST(YuvToRgb, YuvRgbScalar) { - YuvToRgbTest yuv2rgb_test(false); + YuvToRgbTest yuv2rgb_test(3, false); yuv2rgb_test.execute_scalar_test(kleidicv_yuv_to_rgb_u8); } TEST(YuvToRgb, YuvRgbVector) { - YuvToRgbTest yuv2rgb_test(false); + YuvToRgbTest yuv2rgb_test(3, false); yuv2rgb_test.execute_vector_test(kleidicv_yuv_to_rgb_u8); } TEST(YuvToRgb, YuvBgrScalar) { - YuvToRgbTest yuv2rgb_test(true); + YuvToRgbTest yuv2rgb_test(3, true); yuv2rgb_test.execute_scalar_test(kleidicv_yuv_to_bgr_u8); } TEST(YuvToRgb, YuvBgrVector) { - YuvToRgbTest yuv2rgb_test(true); + YuvToRgbTest yuv2rgb_test(3, true); yuv2rgb_test.execute_vector_test(kleidicv_yuv_to_bgr_u8); } + +TEST(YuvToRgb, YuvRgbaScalar) { + YuvToRgbTest yuv2rgb_test(4, false); + yuv2rgb_test.execute_scalar_test(kleidicv_yuv_to_rgba_u8); +} + +TEST(YuvToRgb, YuvRgbaVector) { + YuvToRgbTest yuv2rgb_test(4, false); + yuv2rgb_test.execute_vector_test(kleidicv_yuv_to_rgba_u8); +} + +TEST(YuvToRgb, YuvBgraScalar) { + YuvToRgbTest yuv2rgb_test(4, true); + yuv2rgb_test.execute_scalar_test(kleidicv_yuv_to_bgra_u8); +} + +TEST(YuvToRgb, YuvBgraVector) { + YuvToRgbTest yuv2rgb_test(4, true); + yuv2rgb_test.execute_vector_test(kleidicv_yuv_to_bgra_u8); +}