From e42afdbd9f7fd4a96298113b97d26bd2e8278a00 Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Tue, 19 Mar 2024 15:28:33 +0000 Subject: [PATCH] Add function to resize image with linear interpolation Initially only 2*2 resize is implemented. --- benchmark/benchmark.cpp | 22 ++ intrinsiccv/include/intrinsiccv/intrinsiccv.h | 30 ++ .../intrinsiccv/resize/resize_linear.h | 35 +++ intrinsiccv/src/resize/resize_linear_api.cpp | 14 + intrinsiccv/src/resize/resize_linear_neon.cpp | 183 +++++++++++ intrinsiccv/src/resize/resize_linear_sc.h | 188 ++++++++++++ intrinsiccv/src/resize/resize_linear_sme2.cpp | 18 ++ intrinsiccv/src/resize/resize_linear_sve2.cpp | 18 ++ test/api/CMakeLists.txt | 1 + test/api/test_resize_linear.cpp | 287 ++++++++++++++++++ 10 files changed, 796 insertions(+) create mode 100644 intrinsiccv/include/intrinsiccv/resize/resize_linear.h create mode 100644 intrinsiccv/src/resize/resize_linear_api.cpp create mode 100644 intrinsiccv/src/resize/resize_linear_neon.cpp create mode 100644 intrinsiccv/src/resize/resize_linear_sc.h create mode 100644 intrinsiccv/src/resize/resize_linear_sme2.cpp create mode 100644 intrinsiccv/src/resize/resize_linear_sve2.cpp create mode 100644 test/api/test_resize_linear.cpp diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index a706e1ce7..2c484b167 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -59,3 +59,25 @@ static void min_max_loc_u8(benchmark::State& state) { } } BENCHMARK(min_max_loc_u8); + +static void resize_linear_u8(benchmark::State& state) { + // Setup + size_t src_width = image_width / 2; + size_t src_height = image_height / 2; + size_t dst_width = src_width * 2; + size_t dst_height = src_height * 2; + std::vector src, dst; + src.resize(src_width * src_height); + dst.resize(dst_width * dst_height); + std::mt19937 generator; + std::generate(src.begin(), src.end(), generator); + + for (auto _ : state) { + // This code gets benchmarked + auto unused = intrinsiccv_resize_linear_u8( + src.data(), src_width, src_width, src_height, dst.data(), dst_width, + dst_width, dst_height); + (void)unused; + } +} +BENCHMARK(resize_linear_u8); diff --git a/intrinsiccv/include/intrinsiccv/intrinsiccv.h b/intrinsiccv/include/intrinsiccv/intrinsiccv.h index fdf0f2af1..c58397274 100644 --- a/intrinsiccv/include/intrinsiccv/intrinsiccv.h +++ b/intrinsiccv/include/intrinsiccv/intrinsiccv.h @@ -851,6 +851,36 @@ INTRINSICCV_API_DECLARATION(intrinsiccv_resize_to_quarter_u8, size_t dst_stride, size_t dst_width, size_t dst_height); +/// Resize image using linear interpolation. +/// +/// At present only 2*2 upsizing is supported. +/// For other ratios INTRINSICCV_ERROR_NOT_IMPLEMENTED +/// will be returned. +/// The total number of pixels in the destination is limited to +/// @ref INTRINSICCV_MAX_IMAGE_PIXELS. +/// +/// @param src Pointer to the source data. Must be non-null. +/// @param src_stride Distance in bytes from the start of one row to the +/// start of the next row for the source data. +/// Must be a multiple of sizeof(type). +/// Must not be less than width * sizeof(type). +/// @param src_width Number of elements in the source row. +/// @param src_height Number of rows in the source data. +/// @param dst Pointer to the destination data. Must be non-null. +/// @param dst_stride Distance in bytes from the start of one row to the +/// start of the next row for the destination data. +/// Must be a multiple of sizeof(type). +/// Must not be less than width * sizeof(type). +/// @param dst_width Number of elements in the destination row. +/// Must be src_width * 2. +/// @param dst_height Number of rows in the destination data. +/// Must be src_height * 2. +/// +INTRINSICCV_API_DECLARATION(intrinsiccv_resize_linear_u8, const uint8_t *src, + size_t src_stride, size_t src_width, + size_t src_height, uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height); + /// Calculates vertical derivative approximation with Sobel filter. /// /// The used convolution kernel is: diff --git a/intrinsiccv/include/intrinsiccv/resize/resize_linear.h b/intrinsiccv/include/intrinsiccv/resize/resize_linear.h new file mode 100644 index 000000000..d5dcff866 --- /dev/null +++ b/intrinsiccv/include/intrinsiccv/resize/resize_linear.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef INTRINSICCV_RESIZE_RESIZE_LINEAR_H +#define INTRINSICCV_RESIZE_RESIZE_LINEAR_H + +#include "intrinsiccv/intrinsiccv.h" + +namespace intrinsiccv { + +namespace neon { +intrinsiccv_error_t resize_linear_u8(const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height); +} // namespace neon + +namespace sve2 { +intrinsiccv_error_t resize_linear_u8(const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height); +} // namespace sve2 + +namespace sme2 { +intrinsiccv_error_t resize_linear_u8(const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height); +} // namespace sme2 + +} // namespace intrinsiccv + +#endif // INTRINSICCV_RESIZE_RESIZE_H diff --git a/intrinsiccv/src/resize/resize_linear_api.cpp b/intrinsiccv/src/resize/resize_linear_api.cpp new file mode 100644 index 000000000..010b1f8b7 --- /dev/null +++ b/intrinsiccv/src/resize/resize_linear_api.cpp @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "intrinsiccv/dispatch.h" +#include "intrinsiccv/intrinsiccv.h" +#include "intrinsiccv/resize/resize_linear.h" + +namespace intrinsiccv { +INTRINSICCV_MULTIVERSION_C_API( + intrinsiccv_resize_linear_u8, intrinsiccv::neon::resize_linear_u8, + INTRINSICCV_SVE2_IMPL_IF(intrinsiccv::sve2::resize_linear_u8), + intrinsiccv::sme2::resize_linear_u8); +} // namespace intrinsiccv diff --git a/intrinsiccv/src/resize/resize_linear_neon.cpp b/intrinsiccv/src/resize/resize_linear_neon.cpp new file mode 100644 index 000000000..18733e4f1 --- /dev/null +++ b/intrinsiccv/src/resize/resize_linear_neon.cpp @@ -0,0 +1,183 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "intrinsiccv/intrinsiccv.h" +#include "intrinsiccv/neon.h" +#include "intrinsiccv/resize/resize_linear.h" + +namespace intrinsiccv::neon { + +INTRINSICCV_TARGET_FN_ATTRS static intrinsiccv_error_t resize_2x2_u8( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride) { + if (src_width == 0 || src_height == 0) { + return INTRINSICCV_OK; + } + + size_t dst_width = src_width * 2; + + auto lerp1d_scalar = [](uint8_t near, uint8_t far) { + return (near * 3 + far + 2) >> 2; + }; + + auto lerp1d_vector = [](uint8x8_t near, uint8x8_t far) { + uint8x8_t three = vdup_n_u8(3); + uint8x8_t two = vdup_n_u8(2); + + // near * 3 + uint16x8_t near3 = vmull_u8(near, three); + + // far + 2 + uint16x8_t far_2 = vaddl_u8(far, two); + + // near * 3 + far * 2 + uint16x8_t near3_far_2 = vaddq_u16(near3, far_2); + + // (near * 3 + far * 2) / 4 + uint8x8_t near3_far_2_div4 = vshrn_n_u16(near3_far_2, 2); + + return near3_far_2_div4; + }; + + auto lerp2d_scalar = [](uint8_t near, uint8_t mid_a, uint8_t mid_b, + uint8_t far) { + return (near * 9 + (mid_a + mid_b) * 3 + far + 8) >> 4; + }; + + auto lerp2d_vector = [](uint8x8_t near, uint8x8_t mid_a, uint8x8_t mid_b, + uint8x8_t far) { + uint8x8_t nine = vdup_n_u8(9); + uint16x8_t three = vdupq_n_u16(3); + uint8x8_t eight = vdup_n_u8(8); + + // mid_a + mid_b + uint16x8_t mid = vaddl_u8(mid_a, mid_b); + + // near * 9 + uint16x8_t near9 = vmull_u8(near, nine); + + // near * 9 + (mid_a + mid_b) * 3 + uint16x8_t near9_mid3 = vmlaq_u16(near9, mid, three); + + // far + 8 + uint16x8_t far_8 = vaddl_u8(far, eight); + + // near * 9 + (mid_a + mid_b) * 3 + far + 8 + uint16x8_t near9_mid3_far_8 = vaddq_u16(near9_mid3, far_8); + + // (near * 9 + (mid_a + mid_b) * 3 + far + 8) / 16 + uint8x8_t near9_mid3_far_8_div16 = vshrn_n_u16(near9_mid3_far_8, 4); + return near9_mid3_far_8_div16; + }; + + // Handle top or bottom edge + auto process_edge_row = [src_width, dst_width, lerp1d_scalar, lerp1d_vector]( + const uint8_t *src_row, uint8_t *dst_row) { + // Left element + dst_row[0] = src_row[0]; + + // Right element + dst_row[dst_width - 1] = src_row[src_width - 1]; + + // Middle elements + size_t src_x = 0; + for (; src_x + sizeof(uint8x8_t) < src_width; src_x += sizeof(uint8x8_t)) { + size_t dst_x = src_x * 2 + 1; + uint8x8_t src_left = vld1_u8(src_row + src_x); + uint8x8_t src_right = vld1_u8(src_row + src_x + 1); + + uint8x8_t dst_left = lerp1d_vector(src_left, src_right); + uint8x8_t dst_right = lerp1d_vector(src_right, src_left); + + vst2_u8(dst_row + dst_x, (uint8x8x2_t{dst_left, dst_right})); + } + for (; src_x + 1 < src_width; ++src_x) { + size_t dst_x = src_x * 2 + 1; + const uint8_t src_left = src_row[src_x], src_right = src_row[src_x + 1]; + dst_row[dst_x] = lerp1d_scalar(src_left, src_right); + dst_row[dst_x + 1] = lerp1d_scalar(src_right, src_left); + } + }; + + auto process_row = [src_width, dst_width, lerp1d_scalar, lerp2d_scalar, + lerp2d_vector](const uint8_t *src_row0, + const uint8_t *src_row1, uint8_t *dst_row0, + uint8_t *dst_row1) { + // Left element + dst_row0[0] = lerp1d_scalar(src_row0[0], src_row1[0]); + dst_row1[0] = lerp1d_scalar(src_row1[0], src_row0[0]); + + // Right element + dst_row0[dst_width - 1] = + lerp1d_scalar(src_row0[src_width - 1], src_row1[src_width - 1]); + dst_row1[dst_width - 1] = + lerp1d_scalar(src_row1[src_width - 1], src_row0[src_width - 1]); + + // Middle elements + size_t src_x = 0; + for (; src_x + sizeof(uint8x8_t) < src_width; src_x += sizeof(uint8x8_t)) { + size_t dst_x = src_x * 2 + 1; + + uint8x8_t src_tl = vld1_u8(src_row0 + src_x); + uint8x8_t src_tr = vld1_u8(src_row0 + src_x + 1); + uint8x8_t src_bl = vld1_u8(src_row1 + src_x); + uint8x8_t src_br = vld1_u8(src_row1 + src_x + 1); + + uint8x8_t dst_tl = lerp2d_vector(src_tl, src_tr, src_bl, src_br); + uint8x8_t dst_tr = lerp2d_vector(src_tr, src_tl, src_br, src_bl); + uint8x8_t dst_bl = lerp2d_vector(src_bl, src_tl, src_br, src_tr); + uint8x8_t dst_br = lerp2d_vector(src_br, src_tr, src_bl, src_tl); + + vst2_u8(dst_row0 + dst_x, (uint8x8x2_t{dst_tl, dst_tr})); + vst2_u8(dst_row1 + dst_x, (uint8x8x2_t{dst_bl, dst_br})); + } + for (; src_x + 1 < src_width; ++src_x) { + size_t dst_x = src_x * 2 + 1; + const uint8_t src_tl = src_row0[src_x], src_tr = src_row0[src_x + 1], + src_bl = src_row1[src_x], src_br = src_row1[src_x + 1]; + dst_row0[dst_x] = lerp2d_scalar(src_tl, src_tr, src_bl, src_br); + dst_row0[dst_x + 1] = lerp2d_scalar(src_tr, src_tl, src_br, src_bl); + dst_row1[dst_x] = lerp2d_scalar(src_bl, src_tl, src_br, src_tr); + dst_row1[dst_x + 1] = lerp2d_scalar(src_br, src_tr, src_bl, src_tl); + } + }; + + // Top row + process_edge_row(src, dst); + + // Middle rows + for (size_t src_y = 0; src_y + 1 < src_height; ++src_y) { + size_t dst_y = src_y * 2 + 1; + const uint8_t *src_row0 = src + src_stride * src_y; + const uint8_t *src_row1 = src_row0 + src_stride; + uint8_t *dst_row0 = dst + dst_stride * dst_y; + uint8_t *dst_row1 = dst_row0 + dst_stride; + + process_row(src_row0, src_row1, dst_row0, dst_row1); + } + + // Bottom row + process_edge_row(src + src_stride * (src_height - 1), + dst + dst_stride * (src_height * 2 - 1)); + + return INTRINSICCV_OK; +} + +INTRINSICCV_TARGET_FN_ATTRS +intrinsiccv_error_t resize_linear_u8(const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height) { + CHECK_POINTER_AND_STRIDE(src, src_stride); + CHECK_POINTER_AND_STRIDE(dst, dst_stride); + CHECK_IMAGE_SIZE(dst_width, dst_height); + + if (src_width * 2 == dst_width && src_height * 2 == dst_height) { + return resize_2x2_u8(src, src_stride, src_width, src_height, dst, + dst_stride); + } + return INTRINSICCV_ERROR_NOT_IMPLEMENTED; +} + +} // namespace intrinsiccv::neon diff --git a/intrinsiccv/src/resize/resize_linear_sc.h b/intrinsiccv/src/resize/resize_linear_sc.h new file mode 100644 index 000000000..148eee981 --- /dev/null +++ b/intrinsiccv/src/resize/resize_linear_sc.h @@ -0,0 +1,188 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef INTRINSICCV_RESIZE_LINEAR_SC_H +#define INTRINSICCV_RESIZE_LINEAR_SC_H + +#include "intrinsiccv/intrinsiccv.h" +#include "intrinsiccv/sve2.h" + +namespace intrinsiccv::sve2 { + +INTRINSICCV_TARGET_FN_ATTRS static intrinsiccv_error_t resize_2x2_u8_sc( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride) INTRINSICCV_STREAMING_COMPATIBLE { + if (src_width == 0 || src_height == 0) { + return INTRINSICCV_OK; + } + + size_t dst_width = src_width * 2; + size_t dst_height = src_height * 2; + + auto lerp1d_scalar = [](uint8_t near, uint8_t far) + INTRINSICCV_STREAMING_COMPATIBLE { + return (near * 3 + far + 2) >> 2; + }; + + auto lerp1d_vector = [](svuint8_t near, + svuint8_t far) INTRINSICCV_STREAMING_COMPATIBLE { + // near * 3 + svuint16_t near3b = svmullb(near, uint8_t{3}); + svuint16_t near3t = svmullt(near, uint8_t{3}); + + // near * 3 + far + svuint16_t near3_far_b = svaddwb(near3b, far); + svuint16_t near3_far_t = svaddwt(near3t, far); + + // near * 3 + far + 2 + svuint16_t near3_far_2b = svaddwb(near3_far_b, uint8_t{2}); + svuint16_t near3_far_2t = svaddwt(near3_far_t, uint8_t{2}); + + // (near * 3 + far + 2) / 4 + svuint8_t near3_far_2_div4 = svshrnb_n_u16(near3_far_2b, 2); + near3_far_2_div4 = svshrnt_n_u16(near3_far_2_div4, near3_far_2t, 2); + return near3_far_2_div4; + }; + + auto lerp2d_vector = [](svbool_t pg, svuint8_t near, svuint8_t mid_a, + svuint8_t mid_b, + svuint8_t far) INTRINSICCV_STREAMING_COMPATIBLE { + // near * 9 + svuint16_t near9b = svmullb(near, uint8_t{9}); + svuint16_t near9t = svmullt(near, uint8_t{9}); + + // mid_a + mid_b + svuint16_t midb = svaddlb(mid_a, mid_b); + svuint16_t midt = svaddlt(mid_a, mid_b); + + // near * 9 + (mid_a + mid_b) * 3 + svuint16_t near9_mid3b = svmla_x(pg, near9b, midb, uint16_t{3}); + svuint16_t near9_mid3t = svmla_x(pg, near9t, midt, uint16_t{3}); + + // near * 9 + (mid_a + mid_b) * 3 + far + svuint16_t near9_mid3_far_b = svaddwb(near9_mid3b, far); + svuint16_t near9_mid3_far_t = svaddwt(near9_mid3t, far); + + // near * 9 + (mid_a + mid_b) * 3 + far + 8 + svuint16_t near9_mid3_far_8b = svaddwb(near9_mid3_far_b, uint8_t{8}); + svuint16_t near9_mid3_far_8t = svaddwt(near9_mid3_far_t, uint8_t{8}); + + // (near * 9 + (mid_a + mid_b) * 3 + far + 8) / 16 + svuint8_t near9_mid3_far_8_div16 = svshrnb_n_u16(near9_mid3_far_8b, 4); + near9_mid3_far_8_div16 = + svshrnt_n_u16(near9_mid3_far_8_div16, near9_mid3_far_8t, 4); + return near9_mid3_far_8_div16; + }; + + // Work-around for clang-format oddness. +#define ISC INTRINSICCV_STREAMING_COMPATIBLE + + // Handle top or bottom edge + auto process_edge_row = [src_width, lerp1d_vector](const uint8_t *src_row, + uint8_t *dst_row) ISC { + for (size_t src_x = 0; src_x + 1 < src_width; src_x += svcntb()) { + size_t dst_x = src_x * 2 + 1; + + svbool_t pg = svwhilelt_b8(src_x + 1, src_width); + + svuint8_t src_left = svld1_u8(pg, src_row + src_x); + svuint8_t src_right = svld1_u8(pg, src_row + src_x + 1); + + svuint8_t dst_left = lerp1d_vector(src_left, src_right); + svuint8_t dst_right = lerp1d_vector(src_right, src_left); + + svst2_u8(pg, dst_row + dst_x, svcreate2(dst_left, dst_right)); + } + }; + + auto process_row = [src_width, lerp2d_vector]( + const uint8_t *src_row0, const uint8_t *src_row1, + uint8_t *dst_row0, + uint8_t *dst_row1) INTRINSICCV_STREAMING_COMPATIBLE { + // Middle elements + for (size_t src_x = 0; src_x + 1 < src_width; src_x += svcntb()) { + size_t dst_x = src_x * 2 + 1; + + svbool_t pg = svwhilelt_b8(src_x + 1, src_width); + + svuint8_t src_tl = svld1_u8(pg, src_row0 + src_x); + svuint8_t src_tr = svld1_u8(pg, src_row0 + src_x + 1); + svuint8_t src_bl = svld1_u8(pg, src_row1 + src_x); + svuint8_t src_br = svld1_u8(pg, src_row1 + src_x + 1); + + svuint8_t dst_tl = lerp2d_vector(pg, src_tl, src_tr, src_bl, src_br); + svuint8_t dst_tr = lerp2d_vector(pg, src_tr, src_tl, src_br, src_bl); + svuint8_t dst_bl = lerp2d_vector(pg, src_bl, src_tl, src_br, src_tr); + svuint8_t dst_br = lerp2d_vector(pg, src_br, src_tr, src_bl, src_tl); + + svst2_u8(pg, dst_row0 + dst_x, svcreate2(dst_tl, dst_tr)); + svst2_u8(pg, dst_row1 + dst_x, svcreate2(dst_bl, dst_br)); + } + }; + + // Corners + dst[0] = src[0]; + dst[dst_width - 1] = src[src_width - 1]; + dst[dst_stride * (dst_height - 1)] = src[src_stride * (src_height - 1)]; + dst[dst_stride * (dst_height - 1) + dst_width - 1] = + src[src_stride * (src_height - 1) + src_width - 1]; + + // Left & right edge + for (size_t src_y = 0; src_y + 1 < src_height; ++src_y) { + size_t dst_y = src_y * 2 + 1; + const uint8_t *src_row0 = src + src_stride * src_y; + const uint8_t *src_row1 = src_row0 + src_stride; + uint8_t *dst_row0 = dst + dst_stride * dst_y; + uint8_t *dst_row1 = dst_row0 + dst_stride; + + // Left edge + dst_row0[0] = lerp1d_scalar(src_row0[0], src_row1[0]); + dst_row1[0] = lerp1d_scalar(src_row1[0], src_row0[0]); + + // Right edge + dst_row0[dst_width - 1] = + lerp1d_scalar(src_row0[src_width - 1], src_row1[src_width - 1]); + dst_row1[dst_width - 1] = + lerp1d_scalar(src_row1[src_width - 1], src_row0[src_width - 1]); + } + + // Top row + process_edge_row(src, dst); + + // Middle rows + for (size_t src_y = 0; src_y + 1 < src_height; ++src_y) { + size_t dst_y = src_y * 2 + 1; + const uint8_t *src_row0 = src + src_stride * src_y; + const uint8_t *src_row1 = src_row0 + src_stride; + uint8_t *dst_row0 = dst + dst_stride * dst_y; + uint8_t *dst_row1 = dst_row0 + dst_stride; + + process_row(src_row0, src_row1, dst_row0, dst_row1); + } + + // Bottom row + process_edge_row(src + src_stride * (src_height - 1), + dst + dst_stride * (src_height * 2 - 1)); + + return INTRINSICCV_OK; +} + +INTRINSICCV_TARGET_FN_ATTRS static intrinsiccv_error_t resize_linear_u8_sc( + const uint8_t *src, size_t src_stride, size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, size_t dst_width, + size_t dst_height) INTRINSICCV_STREAMING_COMPATIBLE { + CHECK_POINTER_AND_STRIDE(src, src_stride); + CHECK_POINTER_AND_STRIDE(dst, dst_stride); + CHECK_IMAGE_SIZE(dst_width, dst_height); + + if (src_width * 2 == dst_width && src_height * 2 == dst_height) { + return resize_2x2_u8_sc(src, src_stride, src_width, src_height, dst, + dst_stride); + } + return INTRINSICCV_ERROR_NOT_IMPLEMENTED; +} + +} // namespace intrinsiccv::sve2 + +#endif // INTRINSICCV_RESIZE_SC_H diff --git a/intrinsiccv/src/resize/resize_linear_sme2.cpp b/intrinsiccv/src/resize/resize_linear_sme2.cpp new file mode 100644 index 000000000..125daaa13 --- /dev/null +++ b/intrinsiccv/src/resize/resize_linear_sme2.cpp @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "intrinsiccv/resize/resize_linear.h" +#include "resize_linear_sc.h" + +namespace intrinsiccv::sme2 { +INTRINSICCV_LOCALLY_STREAMING INTRINSICCV_TARGET_FN_ATTRS intrinsiccv_error_t +resize_linear_u8(const uint8_t *src, size_t src_stride, size_t src_width, + size_t src_height, uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height) { + return intrinsiccv::sve2::resize_linear_u8_sc(src, src_stride, src_width, + src_height, dst, dst_stride, + dst_width, dst_height); +} + +} // namespace intrinsiccv::sme2 diff --git a/intrinsiccv/src/resize/resize_linear_sve2.cpp b/intrinsiccv/src/resize/resize_linear_sve2.cpp new file mode 100644 index 000000000..06fe619ff --- /dev/null +++ b/intrinsiccv/src/resize/resize_linear_sve2.cpp @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include "intrinsiccv/resize/resize_linear.h" +#include "resize_linear_sc.h" + +namespace intrinsiccv::sve2 { +INTRINSICCV_TARGET_FN_ATTRS +intrinsiccv_error_t resize_linear_u8(const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height) { + return resize_linear_u8_sc(src, src_stride, src_width, src_height, dst, + dst_stride, dst_width, dst_height); +} + +} // namespace intrinsiccv::sve2 diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt index ab591e547..22ea34bbc 100644 --- a/test/api/CMakeLists.txt +++ b/test/api/CMakeLists.txt @@ -39,4 +39,5 @@ target_link_libraries( intrinsiccv-api-test intrinsiccv gtest_main + gmock ) diff --git a/test/api/test_resize_linear.cpp b/test/api/test_resize_linear.cpp new file mode 100644 index 000000000..9446577d7 --- /dev/null +++ b/test/api/test_resize_linear.cpp @@ -0,0 +1,287 @@ +// SPDX-FileCopyrightText: 2024 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include + +#include + +#include "framework/array.h" +#include "framework/utils.h" +#include "intrinsiccv/intrinsiccv.h" +#include "test_config.h" + +TEST(ResizeLinear, NotImplemented) { + const uint8_t src[1] = {}; + uint8_t dst[4]; + + EXPECT_EQ(INTRINSICCV_ERROR_NOT_IMPLEMENTED, + intrinsiccv_resize_linear_u8(src, 1, 1, 1, dst, 2, 2, 1)); + EXPECT_EQ(INTRINSICCV_ERROR_NOT_IMPLEMENTED, + intrinsiccv_resize_linear_u8(src, 1, 1, 1, dst, 1, 1, 2)); +} + +TEST(ResizeLinear, NullPointer) { + const uint8_t src[1] = {}; + uint8_t dst[4]; + test::test_null_args(intrinsiccv_resize_linear_u8, src, 1, 1, 1, dst, 2, 2, + 2); +} + +TEST(ResizeLinear, InvalidImageSize) { + const uint8_t src[1] = {}; + uint8_t dst[4]; + + EXPECT_EQ(INTRINSICCV_ERROR_RANGE, + intrinsiccv_resize_linear_u8(src, 1, 1, 1, dst, + INTRINSICCV_MAX_IMAGE_PIXELS + 1, + INTRINSICCV_MAX_IMAGE_PIXELS + 1, 1)); + + EXPECT_EQ(INTRINSICCV_ERROR_RANGE, + intrinsiccv_resize_linear_u8( + src, 1, 1, 1, dst, INTRINSICCV_MAX_IMAGE_PIXELS, + INTRINSICCV_MAX_IMAGE_PIXELS, INTRINSICCV_MAX_IMAGE_PIXELS)); +} + +TEST(ResizeLinear, ZeroImageSize) { + const uint8_t src[1] = {}; + uint8_t dst[1]; + EXPECT_EQ(INTRINSICCV_OK, + intrinsiccv_resize_linear_u8(src, 0, 0, 0, dst, 0, 0, 0)); + EXPECT_EQ(INTRINSICCV_OK, + intrinsiccv_resize_linear_u8(src, 1, 1, 0, dst, 2, 2, 0)); + EXPECT_EQ(INTRINSICCV_OK, + intrinsiccv_resize_linear_u8(src, 0, 0, 1, dst, 0, 0, 2)); +} + +static void resize_linear_unaccelerated(const uint8_t *src, size_t src_stride, + size_t src_width, size_t src_height, + uint8_t *dst, size_t dst_stride, + size_t dst_width, size_t dst_height) { + auto lerp1d = [](uint8_t near, uint8_t far) { + return (near * 3 + far + 2) >> 2; + }; + + auto lerp2d = [](uint8_t near, uint8_t mid_a, uint8_t mid_b, uint8_t far) { + return (near * 9 + (mid_a + mid_b) * 3 + far + 8) >> 4; + }; + + auto process_row = [src_width, dst_width, lerp1d, lerp2d]( + const uint8_t *src_row0, const uint8_t *src_row1, + uint8_t *dst_row0, uint8_t *dst_row1) { + // Left element + dst_row0[0] = lerp1d(src_row0[0], src_row1[0]); + dst_row1[0] = lerp1d(src_row1[0], src_row0[0]); + + // Right element + dst_row0[dst_width - 1] = + lerp1d(src_row0[src_width - 1], src_row1[src_width - 1]); + dst_row1[dst_width - 1] = + lerp1d(src_row1[src_width - 1], src_row0[src_width - 1]); + + for (size_t src_x = 0; src_x + 1 < src_width; ++src_x) { + size_t dst_x = src_x * 2 + 1; + const uint8_t src_tl = src_row0[src_x], src_tr = src_row0[src_x + 1], + src_bl = src_row1[src_x], src_br = src_row1[src_x + 1]; + dst_row0[dst_x] = lerp2d(src_tl, src_tr, src_bl, src_br); + dst_row0[dst_x + 1] = lerp2d(src_tr, src_tl, src_br, src_bl); + dst_row1[dst_x] = lerp2d(src_bl, src_tl, src_br, src_tr); + dst_row1[dst_x + 1] = lerp2d(src_br, src_tr, src_bl, src_tl); + } + }; + + // Top row + process_row(src, src, dst, dst); + + // Middle rows + for (size_t src_y = 0; src_y + 1 < src_height; ++src_y) { + size_t dst_y = src_y * 2 + 1; + const uint8_t *src_row0 = src + src_stride * src_y; + const uint8_t *src_row1 = src_row0 + src_stride; + uint8_t *dst_row0 = dst + dst_stride * dst_y; + uint8_t *dst_row1 = dst_row0 + dst_stride; + + process_row(src_row0, src_row1, dst_row0, dst_row1); + } + + // Bottom row + const uint8_t *last_src_row = src + src_stride * (src_height - 1); + uint8_t *last_dst_row = dst + dst_stride * (dst_height - 1); + process_row(last_src_row, last_src_row, last_dst_row, last_dst_row); +} + +TEST(ResizeLinear, LargeDimensions) { + size_t src_width = 2049; + size_t src_height = 5; + size_t src_stride = src_width + 6; + size_t dst_width = src_width * 2; + size_t dst_height = src_height * 2; + size_t dst_stride = dst_width + 3; + + std::vector src, dst, expected_data; + src.resize(src_stride * src_height); + dst.resize(dst_stride * dst_height); + expected_data.resize(dst_stride * dst_height); + std::mt19937 generator{test::Options::seed()}; + std::generate(src.begin(), src.end(), generator); + resize_linear_unaccelerated(src.data(), src_stride, src_width, src_height, + expected_data.data(), dst_stride, dst_width, + dst_height); + ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_resize_linear_u8( + src.data(), src_stride, src_width, src_height, + dst.data(), dst_stride, dst_width, dst_height)); + + for (size_t y = 0; y < dst_height; ++y) { + // Compare as int to avoid test framework displaying values as chars. + std::vector actual{ + dst.begin() + static_cast(y * dst_stride), + dst.begin() + static_cast(y * dst_stride + dst_width)}, + expected{expected_data.begin() + static_cast(y * dst_stride), + expected_data.begin() + + static_cast(y * dst_stride + dst_width)}; + EXPECT_THAT(actual, ::testing::ElementsAreArray(expected)) << "Row #" << y; + } +} + +// Parameterised tests +struct ResizeTestParams { + std::vector> src; + std::vector> expected; + + friend void PrintTo(const ResizeTestParams &v, std::ostream *os) { + *os << "([\n"; + for (size_t y = 0; y < v.src.size(); ++y) { + const auto &row = v.src[y]; + *os << " ["; + for (size_t x = 0; x < row.size(); ++x) { + *os << std::setw(3) << int{row[x]}; + if (x + 1 != row.size()) { + *os << ", "; + } + } + *os << "]"; + if (y + 1 != v.src.size()) { + *os << ",\n"; + } + } + *os << "], " << v.expected.size() << ")"; + } +}; + +void do_linear_resize_test(const ResizeTestParams ¶m, size_t src_padding, + size_t dst_padding) { + size_t src_width = param.src[0].size(); + size_t src_height = param.src.size(); + size_t src_stride = src_width + src_padding; + size_t dst_width = param.expected[0].size(); + size_t dst_height = param.expected.size(); + size_t dst_stride = dst_width + dst_padding; + + auto flatten = [](const std::vector> &vec2d, + size_t padding) { + std::vector result; + for (const auto &row : vec2d) { + result.insert(result.end(), row.begin(), row.end()); + result.resize(result.size() + padding); + } + return result; + }; + std::vector src = flatten(param.src, src_padding), dst; + dst.resize(dst_stride * dst_height); + + ASSERT_EQ(INTRINSICCV_OK, intrinsiccv_resize_linear_u8( + src.data(), src_stride, src_width, src_height, + dst.data(), dst_stride, dst_width, dst_height)); + for (size_t y = 0; y < dst_height; ++y) { + // Compare as int to avoid test framework displaying values as chars. + std::vector actual{ + dst.begin() + static_cast(y * dst_stride), + dst.begin() + static_cast(y * dst_stride + dst_width)}, + expected{param.expected[y].begin(), param.expected[y].end()}; + EXPECT_THAT(actual, ::testing::ElementsAreArray(expected)) << "Row #" << y; + } +} + +class ResizeLinear : public testing::TestWithParam {}; + +TEST_P(ResizeLinear, ResizeNoPadding) { + do_linear_resize_test(GetParam(), 0, 0); +} + +TEST_P(ResizeLinear, ResizeWithPadding) { + do_linear_resize_test(GetParam(), 1, 2); +} + +TEST_P(ResizeLinear, ResizePadDst) { do_linear_resize_test(GetParam(), 0, 3); } + +TEST_P(ResizeLinear, ResizePadSrc) { do_linear_resize_test(GetParam(), 4, 0); } + +using P = ResizeTestParams; + +INSTANTIATE_TEST_SUITE_P( + ResizeLinear, ResizeLinear, + testing::Values( + // 1*1 -> 2*2 + P{{{123}}, {{123, 123}, {123, 123}}}, + // 2*1 -> 4*2 + P{{{0, 255}}, {{0, 64, 191, 255}, {0, 64, 191, 255}}}, + // 2*1 -> 4*2. Check rounding behaviour. + P{{{1, 63}}, {{1, 17, 48, 63}, {1, 17, 48, 63}}}, + // 2*2 -> 4*4 + P{{{0, 255}, {100, 8}}, + {{0, 64, 191, 255}, + {25, 67, 151, 193}, + {75, 74, 71, 70}, + {100, 77, 31, 8}}}, + // 3*3 -> 6*6 + P{{{1, 63, 164}, {28, 251, 35}, {218, 64, 99}}, + {{1, 17, 48, 88, 139, 164}, + {8, 33, 84, 115, 126, 132}, + {21, 67, 158, 170, 101, 67}, + {76, 108, 172, 166, 89, 51}, + {171, 156, 126, 104, 90, 83}, + {218, 180, 103, 73, 90, 99}}}, + // 4*4 -> 8*8 + P{{{10, 30, 5, 70}, + {255, 11, 11, 12}, + {127, 127, 128, 0}, + {200, 100, 150, 50}}, + {{10, 15, 25, 24, 11, 21, 54, 70}, + {71, 60, 37, 21, 11, 19, 43, 56}, + {194, 149, 60, 14, 11, 14, 22, 27}, + {223, 177, 86, 40, 40, 32, 17, 9}, + {159, 144, 113, 98, 99, 75, 27, 3}, + {145, 139, 127, 124, 130, 103, 43, 13}, + {182, 163, 126, 116, 135, 118, 64, 38}, + {200, 175, 125, 113, 138, 125, 75, 50}}}, + // 35*2 -> 70*4 + P{{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 82, + 155, 104, 108, 227, 46, 162, 21, 220, 235, 183, 113, 225, + 146, 196, 144, 104, 148, 19, 126, 172, 9, 12, 61}, + {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 193, + 44, 105, 191, 106, 73, 148, 13, 161, 118, 21, 3, 34, + 40, 150, 120, 68, 75, 14, 31, 124, 221, 214, 146}}, + {{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, + 6, 6, 7, 7, 8, 8, 9, 9, 10, 28, 64, 100, + 137, 142, 117, 105, 107, 138, 197, 182, 91, 75, 133, 127, + 56, 71, 170, 224, 231, 222, 196, 166, 131, 141, 197, 205, + 166, 159, 184, 183, 157, 134, 114, 115, 137, 116, 51, 46, + 99, 138, 161, 131, 50, 10, 11, 24, 49, 61}, + {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 36, 85, 114, + 123, 122, 110, 110, 123, 146, 180, 161, 89, 79, 132, 124, + 54, 66, 159, 205, 206, 190, 158, 128, 100, 108, 154, 163, + 134, 136, 168, 173, 150, 127, 106, 104, 121, 102, 46, 39, + 81, 117, 146, 136, 87, 62, 62, 67, 77, 82}, + {3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, 51, 127, 142, 95, 80, 97, 121, + 154, 162, 145, 119, 84, 88, 130, 117, 49, 55, 136, 169, 154, 126, + 83, 54, 38, 43, 69, 78, 70, 90, 138, 153, 135, 114, 89, 81, + 89, 74, 35, 25, 45, 75, 116, 144, 160, 167, 165, 154, 134, 125}, + {4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, 13, 14, 59, 148, 156, + 81, 59, 90, 127, 170, 170, 127, 98, 81, 92, 129, 114, + 47, 50, 124, 150, 129, 94, 45, 17, 8, 11, 26, 36, + 39, 68, 123, 143, 128, 107, 81, 70, 73, 60, 29, 18, + 27, 54, 101, 148, 197, 219, 216, 197, 163, 146}}})); -- GitLab