From 803b5c4df4889d4d48535fa531f216a2e55695b0 Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Fri, 19 Jan 2024 17:52:58 +0000 Subject: [PATCH] Replace svset calls with svcreate * It is potentially more efficient * It avoids passing an unitialised variable as an argument. --- intrinsiccv/src/conversions/gray_to_rgb_sc.h | 27 ++------ intrinsiccv/src/conversions/rgb_to_rgb_sc.h | 65 ++++++-------------- intrinsiccv/src/conversions/yuv_to_rgb_sc.h | 33 ++-------- intrinsiccv/src/filters/gaussian_blur_sc.h | 4 +- intrinsiccv/src/filters/sobel_sc.h | 10 ++- 5 files changed, 35 insertions(+), 104 deletions(-) diff --git a/intrinsiccv/src/conversions/gray_to_rgb_sc.h b/intrinsiccv/src/conversions/gray_to_rgb_sc.h index c4c3fa446..75a34fbd5 100644 --- a/intrinsiccv/src/conversions/gray_to_rgb_sc.h +++ b/intrinsiccv/src/conversions/gray_to_rgb_sc.h @@ -26,12 +26,7 @@ class GrayToRGB final : void vector_path(ContextType ctx, VectorType src_vect, ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); - svuint8x3_t dst_vect; - - dst_vect = svset3(dst_vect, 0, src_vect); - dst_vect = svset3(dst_vect, 1, src_vect); - dst_vect = svset3(dst_vect, 2, src_vect); - + svuint8x3_t dst_vect = svcreate3(src_vect, src_vect, src_vect); svst3(pg, dst, dst_vect); } #else // INTRINSICCV_PREFER_INTERLEAVING_LOAD_STORE @@ -98,9 +93,7 @@ class GrayToRGB final : pg_all, svlsr_x(pg_all, svmulh_x(pg_all, indices_2, const_171), 1), (svcntb() * 2) / 3); - indices_ = svset3(indices_, 0, indices_0); - indices_ = svset3(indices_, 1, indices_1); - indices_ = svset3(indices_, 2, indices_2); + indices_ = svcreate3(indices_0, indices_1, indices_2); } svuint8x3_t &indices_; @@ -123,12 +116,7 @@ class GrayToRGBA final : ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); svuint8_t alpha = svdup_u8(0xff); - svuint8x4_t dst_vect; - - dst_vect = svset4(dst_vect, 0, src_vect); - dst_vect = svset4(dst_vect, 1, src_vect); - dst_vect = svset4(dst_vect, 2, src_vect); - dst_vect = svset4(dst_vect, 3, alpha); + svuint8x4_t dst_vect = svcreate4(src_vect, src_vect, src_vect, alpha); svst4(pg, dst, dst_vect); } @@ -159,9 +147,7 @@ class GrayToRGBA final : void common_vector_path(svbool_t pg_0, svbool_t pg_1, svbool_t pg_2, svbool_t pg_3, VectorType src_vect, ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { - svuint8x2_t src_and_alpha; - src_and_alpha = svset2(src_and_alpha, 0, src_vect); - src_and_alpha = svset2(src_and_alpha, 1, VecTraits::svdup(-1)); + svuint8x2_t src_and_alpha = svcreate2(src_vect, VecTraits::svdup(-1)); // Convert from gray to RGBA using table-lookups. VectorType dst_vec_0 = svtbl2(src_and_alpha, svget4(indices_, 0)); @@ -203,10 +189,7 @@ class GrayToRGBA final : VectorType indices_3 = svreinterpret_u8_u32(svindex_u32(start_index, 0x10101)); - indices_ = svset4(indices_, 0, indices_0); - indices_ = svset4(indices_, 1, indices_1); - indices_ = svset4(indices_, 2, indices_2); - indices_ = svset4(indices_, 3, indices_3); + indices_ = svcreate4(indices_0, indices_1, indices_2, indices_3); } svuint8x4_t &indices_; diff --git a/intrinsiccv/src/conversions/rgb_to_rgb_sc.h b/intrinsiccv/src/conversions/rgb_to_rgb_sc.h index cf14bbf89..6e476f300 100644 --- a/intrinsiccv/src/conversions/rgb_to_rgb_sc.h +++ b/intrinsiccv/src/conversions/rgb_to_rgb_sc.h @@ -27,11 +27,8 @@ class RGBToBGR final : ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); svuint8x3_t src_vect = svld3(pg, src); - svuint8x3_t dst_vect; - - dst_vect = svset3(dst_vect, 0, svget3(src_vect, 2)); - dst_vect = svset3(dst_vect, 1, svget3(src_vect, 1)); - dst_vect = svset3(dst_vect, 2, svget3(src_vect, 0)); + svuint8x3_t dst_vect = svcreate3(svget3(src_vect, 2), svget3(src_vect, 1), + svget3(src_vect, 0)); svst3(pg, dst, dst_vect); } @@ -66,19 +63,13 @@ class RGBToBGR final : VectorType src_1 = svld1_vnum(pg_1, &src[0], 1); VectorType src_2 = svld1_vnum(pg_2, &src[0], 2); - svuint8x2_t src_vect_0_1; - src_vect_0_1 = svset2(src_vect_0_1, 0, src_0); - src_vect_0_1 = svset2(src_vect_0_1, 1, src_1); - - svuint8x2_t src_vect_1_2; - src_vect_1_2 = svset2(src_vect_1_2, 0, src_1); - src_vect_1_2 = svset2(src_vect_1_2, 1, src_2); + svuint8x2_t src_vect_0_1 = svcreate2(src_0, src_1); + svuint8x2_t src_vect_1_2 = svcreate2(src_1, src_2); svuint8_t dst_vec_0 = svtbl2(src_vect_0_1, svget4(indices_, 0)); svuint8_t dst_vec_2 = svtbl2(src_vect_1_2, svget4(indices_, 3)); svuint8_t dst_vec_1 = svtbl2(src_vect_0_1, svget4(indices_, 1)); - src_vect_1_2 = svset2(src_vect_1_2, 0, dst_vec_1); - src_vect_1_2 = svset2(src_vect_1_2, 1, src_2); + src_vect_1_2 = svcreate2(dst_vec_1, src_2); dst_vec_1 = svtbl2(src_vect_1_2, svget4(indices_, 2)); svst1(pg_0, &dst[0], dst_vec_0); @@ -88,10 +79,10 @@ class RGBToBGR final : void initialize_indices() INTRINSICCV_STREAMING_COMPATIBLE { svbool_t pg = VecTraits::svptrue(); - indices_ = svset4(indices_, 0, svld1(pg, &kTableIndices[0])); - indices_ = svset4(indices_, 1, svld1_vnum(pg, &kTableIndices[0], 1)); - indices_ = svset4(indices_, 2, svld1_vnum(pg, &kTableIndices[0], 2)); - indices_ = svset4(indices_, 3, svld1_vnum(pg, &kTableIndices[0], 3)); + indices_ = svcreate4(svld1(pg, &kTableIndices[0]), + svld1_vnum(pg, &kTableIndices[0], 1), + svld1_vnum(pg, &kTableIndices[0], 2), + svld1_vnum(pg, &kTableIndices[0], 3)); } static constexpr uint8_t kTableIndices[64] = { @@ -115,12 +106,8 @@ class RGBAToBGRA final : public UnrollTwice { ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); svuint8x4_t src_vect = svld4(pg, src); - svuint8x4_t dst_vect; - - dst_vect = svset4(dst_vect, 0, svget4(src_vect, 2)); - dst_vect = svset4(dst_vect, 1, svget4(src_vect, 1)); - dst_vect = svset4(dst_vect, 2, svget4(src_vect, 0)); - dst_vect = svset4(dst_vect, 3, svget4(src_vect, 3)); + svuint8x4_t dst_vect = svcreate4(svget4(src_vect, 2), svget4(src_vect, 1), + svget4(src_vect, 0), svget4(src_vect, 3)); svst4(pg, dst, dst_vect); } @@ -136,12 +123,8 @@ class RGBToBGRA final : public UnrollTwice { ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); svuint8x3_t src_vect = svld3(pg, src); - svuint8x4_t dst_vect; - - dst_vect = svset4(dst_vect, 0, svget3(src_vect, 2)); - dst_vect = svset4(dst_vect, 1, svget3(src_vect, 1)); - dst_vect = svset4(dst_vect, 2, svget3(src_vect, 0)); - dst_vect = svset4(dst_vect, 3, svdup_u8(0xff)); + svuint8x4_t dst_vect = svcreate4(svget3(src_vect, 2), svget3(src_vect, 1), + svget3(src_vect, 0), svdup_u8(0xff)); svst4(pg, dst, dst_vect); } @@ -157,12 +140,8 @@ class RGBToRGBA final : public UnrollTwice { ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); svuint8x3_t src_vect = svld3(pg, src); - svuint8x4_t dst_vect; - - dst_vect = svset4(dst_vect, 0, svget3(src_vect, 0)); - dst_vect = svset4(dst_vect, 1, svget3(src_vect, 1)); - dst_vect = svset4(dst_vect, 2, svget3(src_vect, 2)); - dst_vect = svset4(dst_vect, 3, svdup_u8(0xff)); + svuint8x4_t dst_vect = svcreate4(svget3(src_vect, 0), svget3(src_vect, 1), + svget3(src_vect, 2), svdup_u8(0xff)); svst4(pg, dst, dst_vect); } @@ -178,11 +157,8 @@ class RGBAToBGR final : public UnrollTwice { ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); svuint8x4_t src_vect = svld4(pg, src); - svuint8x3_t dst_vect; - - dst_vect = svset3(dst_vect, 0, svget4(src_vect, 2)); - dst_vect = svset3(dst_vect, 1, svget4(src_vect, 1)); - dst_vect = svset3(dst_vect, 2, svget4(src_vect, 0)); + svuint8x3_t dst_vect = svcreate3(svget4(src_vect, 2), svget4(src_vect, 1), + svget4(src_vect, 0)); svst3(pg, dst, dst_vect); } @@ -198,11 +174,8 @@ class RGBAToRGB final : public UnrollTwice { ScalarType *dst) INTRINSICCV_STREAMING_COMPATIBLE { auto pg = ctx.predicate(); svuint8x4_t src_vect = svld4(pg, src); - svuint8x3_t dst_vect; - - dst_vect = svset3(dst_vect, 0, svget4(src_vect, 0)); - dst_vect = svset3(dst_vect, 1, svget4(src_vect, 1)); - dst_vect = svset3(dst_vect, 2, svget4(src_vect, 2)); + svuint8x3_t dst_vect = svcreate3(svget4(src_vect, 0), svget4(src_vect, 1), + svget4(src_vect, 2)); svst3(pg, dst, dst_vect); } diff --git a/intrinsiccv/src/conversions/yuv_to_rgb_sc.h b/intrinsiccv/src/conversions/yuv_to_rgb_sc.h index 271d5bd23..36e61b20e 100644 --- a/intrinsiccv/src/conversions/yuv_to_rgb_sc.h +++ b/intrinsiccv/src/conversions/yuv_to_rgb_sc.h @@ -160,38 +160,17 @@ class YUVSpToRGBxOrBGRx final { b1_t = svsra(svdup_n_s16(0), b1_t, kWeightScale - 16); svuint8_t b1 = svqxtunt(svqxtunb(b1_b), b1_t); - // Cosntants to select where to store R and B channels. - static constexpr uint64_t kRIndex = BGR ? 2 : 0; - static constexpr uint64_t kBIndex = BGR ? 0 : 2; - if constexpr (ALPHA) { - svuint8x4_t rgba0, rgba1; - // Red channel - rgba0 = svset4(rgba0, kRIndex, r0); - rgba1 = svset4(rgba1, kRIndex, r1); - // Green channel - rgba0 = svset4(rgba0, 1, g0); - rgba1 = svset4(rgba1, 1, g1); - // Blue channel - rgba0 = svset4(rgba0, kBIndex, b0); - rgba1 = svset4(rgba1, kBIndex, b1); - // Alpha channel - rgba0 = svset4(rgba0, 3, svdup_n_u8(0xFF)); - rgba1 = svset4(rgba1, 3, svdup_n_u8(0xFF)); + svuint8x4_t rgba0 = + svcreate4(BGR ? b0 : r0, g0, BGR ? r0 : b0, svdup_n_u8(0xFF)); + svuint8x4_t rgba1 = + svcreate4(BGR ? b1 : r1, g1, BGR ? r1 : b1, svdup_n_u8(0xFF)); // Store RGBA pixels to memory. svst4_u8(pg, rgbx_row_0, rgba0); svst4_u8(pg, rgbx_row_1, rgba1); } else { - svuint8x3_t rgb0, rgb1; - // Red channel - rgb0 = svset3(rgb0, kRIndex, r0); - rgb1 = svset3(rgb1, kRIndex, r1); - // Green channel - rgb0 = svset3(rgb0, 1, g0); - rgb1 = svset3(rgb1, 1, g1); - // Blue channel - rgb0 = svset3(rgb0, kBIndex, b0); - rgb1 = svset3(rgb1, kBIndex, b1); + svuint8x3_t rgb0 = svcreate3(BGR ? b0 : r0, g0, BGR ? r0 : b0); + svuint8x3_t rgb1 = svcreate3(BGR ? b1 : r1, g1, BGR ? r1 : b1); // Store RGB pixels to memory. svst3(pg, rgbx_row_0, rgb0); svst3(pg, rgbx_row_1, rgb1); diff --git a/intrinsiccv/src/filters/gaussian_blur_sc.h b/intrinsiccv/src/filters/gaussian_blur_sc.h index f2e82404f..dbd83f763 100644 --- a/intrinsiccv/src/filters/gaussian_blur_sc.h +++ b/intrinsiccv/src/filters/gaussian_blur_sc.h @@ -50,9 +50,7 @@ class DiscreteGaussianBlur { acc_u16_b = svmad_u16_x(pg, acc_1_3_b, const_4_u16, acc_u16_b); acc_u16_t = svmad_u16_x(pg, acc_1_3_t, const_4_u16, acc_u16_t); - svuint16x2_t interleaved; - interleaved = svset2(interleaved, 0, acc_u16_b); - interleaved = svset2(interleaved, 1, acc_u16_t); + svuint16x2_t interleaved = svcreate2(acc_u16_b, acc_u16_t); svst2(pg, &dst[0], interleaved); } diff --git a/intrinsiccv/src/filters/sobel_sc.h b/intrinsiccv/src/filters/sobel_sc.h index 176ced4d8..a4f0d96ec 100644 --- a/intrinsiccv/src/filters/sobel_sc.h +++ b/intrinsiccv/src/filters/sobel_sc.h @@ -40,9 +40,8 @@ class HorizontalSobel3x3 { acc_u16_b = svmlalb(acc_u16_b, src_1, svdup_n_u8(2)); acc_u16_t = svmlalt(acc_u16_t, src_1, svdup_n_u8(2)); - svint16x2_t interleaved; - interleaved = svset2(interleaved, 0, svreinterpret_s16(acc_u16_b)); - interleaved = svset2(interleaved, 1, svreinterpret_s16(acc_u16_t)); + svint16x2_t interleaved = + svcreate2(svreinterpret_s16(acc_u16_b), svreinterpret_s16(acc_u16_t)); svst2(pg, &dst[0], interleaved); } @@ -91,9 +90,8 @@ class VerticalSobel3x3 { svuint16_t acc_u16_b = svsublb(src_2, src_0); svuint16_t acc_u16_t = svsublt(src_2, src_0); - svint16x2_t interleaved; - interleaved = svset2(interleaved, 0, svreinterpret_s16(acc_u16_b)); - interleaved = svset2(interleaved, 1, svreinterpret_s16(acc_u16_t)); + svint16x2_t interleaved = + svcreate2(svreinterpret_s16(acc_u16_b), svreinterpret_s16(acc_u16_t)); svst2(pg, &dst[0], interleaved); } -- GitLab