From b503d14a840fec13f6dadbb4e5641b3163bb04eb Mon Sep 17 00:00:00 2001 From: Denes Tarjan Date: Tue, 5 Aug 2025 12:58:15 +0000 Subject: [PATCH] WIP new border handling for SME --- .vscode/launch.json | 45 ++ .vscode/tasks.json | 62 +++ .../include/kleidicv/workspace/separable.h | 64 ++- kleidicv/src/filters/border_generic_sc.h | 398 ++++++++++++++++++ kleidicv/src/filters/gaussian_blur_fixed_sc.h | 91 +++- .../src/filters/separable_filter_2d_api.cpp | 2 +- kleidicv/src/filters/separable_filter_2d_sc.h | 2 + kleidicv/src/filters/sobel_sc.h | 7 +- test/CMakeLists.txt | 6 +- test/api/test_gaussian_blur.cpp | 28 +- test/library/CMakeLists.txt | 69 +++ test/library/test_border_generic_sve2.cpp | 162 +++++++ 12 files changed, 896 insertions(+), 40 deletions(-) create mode 100644 kleidicv/src/filters/border_generic_sc.h create mode 100644 test/library/CMakeLists.txt create mode 100644 test/library/test_border_generic_sve2.cpp diff --git a/.vscode/launch.json b/.vscode/launch.json index ab26a3c21..873d3b319 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -16,6 +16,51 @@ "internalConsoleOptions": "openOnSessionStart", "preLaunchTask": "Build KleidiCV for debug", }, + { + "name": "SVE2 library tests, 128 bits", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/kleidicv-debug/test/library/kleidicv-library-test", + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb-multiarch", + "miDebuggerServerAddress": "localhost:2345", + "targetArchitecture": "arm64", + "debugServerPath" : "${workspaceFolder}/.devcontainer/start_qemu.sh", + "debugServerArgs": "-g 2345 -cpu max,sve128=on,sme=off ${workspaceFolder}/build/kleidicv-debug/test/library/kleidicv-library-test --vector-length=16 --gtest_filter=*", + "cwd": "${workspaceFolder}", + "internalConsoleOptions": "openOnSessionStart", + "preLaunchTask": "Build KleidiCV for debug", + }, + { + "name": "SVE2 library tests, 512 bits", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/kleidicv-debug/test/library/kleidicv-library-test", + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb-multiarch", + "miDebuggerServerAddress": "localhost:2345", + "targetArchitecture": "arm64", + "debugServerPath" : "${workspaceFolder}/.devcontainer/start_qemu.sh", + "debugServerArgs": "-g 2345 -cpu max,sve512=on,sve-default-vector-length=64,sme=off ${workspaceFolder}/build/kleidicv-debug/test/library/kleidicv-library-test --vector-length=64 --gtest_filter=*", + "cwd": "${workspaceFolder}", + "internalConsoleOptions": "openOnSessionStart", + "preLaunchTask": "Build KleidiCV for debug", + }, + { + "name": "SVE2 library tests, 2048 bits", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/kleidicv-debug/test/library/kleidicv-library-test", + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb-multiarch", + "miDebuggerServerAddress": "localhost:2345", + "targetArchitecture": "arm64", + "debugServerPath" : "${workspaceFolder}/.devcontainer/start_qemu.sh", + "debugServerArgs": "-g 2345 -cpu max,sve2048=on,sve-default-vector-length=256,sme=off ${workspaceFolder}/build/kleidicv-debug/test/library/kleidicv-library-test --vector-length=256 --gtest_filter=*", + "cwd": "${workspaceFolder}", + "internalConsoleOptions": "openOnSessionStart", + "preLaunchTask": "Build KleidiCV for debug", + }, { "name": "NEON API tests", "type": "cppdbg", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 6ddbdd491..924f50d6e 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -111,6 +111,68 @@ "kind": "test" } }, + { + "label": "SVE2 library tests, 128 bits", + "type": "process", + "command": "qemu-aarch64", + "args": [ + "-cpu", + "max,sve128=on,sme=off", + "${workspaceFolder}/build/kleidicv/test/library/kleidicv-library-test", + "--vector-length=16" + ], + "dependsOn": [ + "Build KleidiCV" + ], + "group": { + "kind": "test" + } + }, + { + "label": "SVE2 library tests, 512 bits", + "type": "process", + "command": "qemu-aarch64", + "args": [ + "-cpu", + "max,sve512=on,sve-default-vector-length=64,sme=off", + "${workspaceFolder}/build/kleidicv/test/library/kleidicv-library-test", + "--vector-length=64" + ], + "dependsOn": [ + "Build KleidiCV" + ], + "group": { + "kind": "test" + } + }, + { + "label": "SVE2 library tests, 2048 bits", + "type": "process", + "command": "qemu-aarch64", + "args": [ + "-cpu", + "max,sve2048=on,sve-default-vector-length=256,sme=off", + "${workspaceFolder}/build/kleidicv/test/library/kleidicv-library-test", + "--vector-length=256" + ], + "dependsOn": [ + "Build KleidiCV" + ], + "group": { + "kind": "test" + } + }, + { + "label": "All library tests", + "dependsOn": [ + "SVE2 library tests, 128 bits", + "SVE2 library tests, 512 bits", + "SVE2 library tests, 2048 bits", + ], + "group": { + "kind": "test" + } + }, { "label": "NEON API tests", "type": "process", diff --git a/kleidicv/include/kleidicv/workspace/separable.h b/kleidicv/include/kleidicv/workspace/separable.h index a9cb2f7aa..f2987de9e 100644 --- a/kleidicv/include/kleidicv/workspace/separable.h +++ b/kleidicv/include/kleidicv/workspace/separable.h @@ -80,8 +80,10 @@ class SeparableFilterWorkspace { SeparableFilterWorkspace() = delete; // Creates a workspace on the heap. + // static Pointer create(Rectangle rect, size_t channels, - size_t intermediate_size) KLEIDICV_STREAMING { + size_t intermediate_size, + size_t kernel_width = 0) KLEIDICV_STREAMING { size_t buffer_rows_number_of_elements = rect.width() * channels; // Adding more elements because of SVE, where interleaving stores are // governed by one predicate. For example, if a predicate requires 7 uint8_t @@ -90,6 +92,10 @@ class SeparableFilterWorkspace { // storing 8 elements. Choosing '3' to account for svst4(). buffer_rows_number_of_elements += 3; + // Add the border elements, at front and end, if needed + size_t margin = kernel_width == 0 ? 0 : (kernel_width - 1) / 2; + buffer_rows_number_of_elements += channels * margin * 2; + size_t buffer_rows_stride = buffer_rows_number_of_elements * intermediate_size; size_t buffer_rows_size = buffer_rows_stride; @@ -148,6 +154,42 @@ class SeparableFilterWorkspace { filter.process_vertical(rect.width(), src_rows.at(vertical_index), buffer_rows, offsets); // Process in the horizontal direction last. + process_horizontal_with_borderhandling(rect.width(), buffer_rows, + dst_rows.at(vertical_index), + filter, horizontal_border); + } + } + + // Processes rows vertically first along the full width + template + void process_using_bordermaker( + Rectangle rect, size_t y_begin, size_t y_end, + Rows src_rows, + Rows dst_rows, size_t channels, + typename FilterType::BorderType border_type, FilterType filter, + BorderMakerType border) KLEIDICV_STREAMING { + // Border helper which calculates border offsets. + typename FilterType::BorderInfoType vertical_border{rect.height(), + border_type}; + typename FilterType::BorderInfoType horizontal_border{rect.width(), + border_type}; + + // Buffer rows which hold intermediate widened data. + auto buffer_rows = Rows{reinterpret_cast( + &data_[buffer_rows_offset_]) + + filter.margin * channels, + buffer_rows_stride_, channels}; + + // Vertical processing loop. + for (size_t vertical_index = y_begin; vertical_index < y_end; + ++vertical_index) { + // Recalculate vertical border offsets. + auto offsets = vertical_border.offsets_with_border(vertical_index); + // Process in the vertical direction first. + filter.process_vertical(rect.width(), src_rows.at(vertical_index), + buffer_rows, offsets); + border.decorate(buffer_rows, filter.margin, rect.width()); + // Process in the horizontal direction last. process_horizontal(rect.width(), buffer_rows, dst_rows.at(vertical_index), filter, horizontal_border); } @@ -198,11 +240,10 @@ class SeparableFilterWorkspace { protected: template - void process_horizontal(size_t width, - Rows buffer_rows, - Rows dst_rows, - FilterType filter, - typename FilterType::BorderInfoType horizontal_border) + void process_horizontal_with_borderhandling( + size_t width, Rows buffer_rows, + Rows dst_rows, FilterType filter, + typename FilterType::BorderInfoType horizontal_border) KLEIDICV_STREAMING { // Margin associated with the filter. constexpr size_t margin = filter.margin; @@ -238,6 +279,17 @@ class SeparableFilterWorkspace { } } + template + void process_horizontal(size_t width, + Rows buffer_rows, + Rows dst_rows, + FilterType filter, + typename FilterType::BorderInfoType horizontal_border) + KLEIDICV_STREAMING { + auto offsets = horizontal_border.offsets_without_border(); + filter.process_horizontal(width, buffer_rows, dst_rows, offsets); + } + // Offset in bytes to the buffer rows from &data_[0]. size_t buffer_rows_offset_; // Stride of the buffer rows. diff --git a/kleidicv/src/filters/border_generic_sc.h b/kleidicv/src/filters/border_generic_sc.h new file mode 100644 index 000000000..8e41cbc73 --- /dev/null +++ b/kleidicv/src/filters/border_generic_sc.h @@ -0,0 +1,398 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef KLEIDICV_WORKSPACE_BORDER_GENERIC_SC_H +#define KLEIDICV_WORKSPACE_BORDER_GENERIC_SC_H + +#include +#include +#include + +#include "kleidicv/sve2.h" +#include "kleidicv/types.h" +#include "kleidicv/workspace/border_types.h" + +namespace KLEIDICV_TARGET_NAMESPACE { + +// Border offsets for generic filters. +template +class GenericBorder final { + public: + explicit GenericBorder(size_t width, size_t channels, svuint16_t& left1, + svuint16_t& left2, svuint16_t& right1, + svuint16_t& right2) KLEIDICV_STREAMING + : width_(static_cast(width)), + channels_{static_cast(channels)}, + total_width_(width_* channels_), + border_indices_left_(left1), + border_indices_left_ext_(left2), + border_indices_right_(right1), + border_indices_right_ext_(right2) { + // The result will take some elements from the image (data), and the + // remaining parts from the border. + // An index vector is prepared here to help the process, e.g. for + // replicated borders and 3 channels, the constructed index vector will + // look like this: + // [1, 2, 0, 1, 2, 3, 4, 5] + // (0,1,2 is repeated until index 0 is reached, when the image data + // begins.) Right side is similar, but it is the [5,6,7] that repeats + // after. + uint16_t left[128 + 4], right[128 + 4]; + // This is to ensure the last element be (channels_ - 1) + uint16_t bias = channels_ - 1 - ((svcnth() - 1) % channels_); + for (size_t i = 0; i < svcnth() + 4; ++i) { + left[i] = (i + bias) % channels_; + right[i] = (i % channels_) + svcnth() - channels_; + } + // Analyser thinks left[0] is garbage, but it is not. + // NOLINTBEGIN(clang-analyzer-core.UndefinedBinaryOperatorResult) + border_indices_left_ = svld1_u16(svptrue_b16(), left); + border_indices_left_ext_ = + svld1_u16(svptrue_b16(), left + channels_ - left[0]); + border_indices_right_ = svld1_u16(svptrue_b16(), right); + border_indices_right_ext_ = svld1_u16(svptrue_b16(), right + left[0]); + // NOLINTEND(clang-analyzer-core.UndefinedBinaryOperatorResult) + } + + // Raw column can be bigger than width-1 or less than 0 + ptrdiff_t get_column(ptrdiff_t raw_column) const { + // TODO more border types, this is only the Replicated + return std::max(std::min(raw_column, width_ - 1), + ptrdiff_t{0}); + } + + // Assuming that start_offset is <= 0 + svuint16_t load_left(Rows src_rows, + ptrdiff_t start_offset) const KLEIDICV_STREAMING { + if constexpr (BorderType == FixedBorderType::REPLICATE) { + svuint16_t data = svld1ub_u16(svptrue_b16(), &src_rows[0]); + svuint16_t indices{}; + svuint16_t increasing = svindex_u16(0, 1); + if (-start_offset < static_cast(svcnth())) { + // '-start_offset' elements from the border, the others from the data + svbool_t pg = + svcmpge_n_u16(svptrue_b16(), increasing, + static_cast(svcnth() + start_offset)); + indices = svsplice_u16(pg, border_indices_left_, increasing); + } else { + // 'shift' elements need to be shifted out + ptrdiff_t shift = channels_ - (-start_offset - svcnth()) % channels_; + svbool_t pg = svcmpge_n_u16(svptrue_b16(), increasing, shift); + indices = + svsplice_u16(pg, border_indices_left_, border_indices_left_ext_); + } + return svtbl_u16(data, indices); + } + } + + // Assuming that start_offset is >= width - svcnth() + svuint16_t load_right(Rows src_rows, + ptrdiff_t start_offset) const KLEIDICV_STREAMING { + if constexpr (BorderType == FixedBorderType::REPLICATE) { + svuint16_t data = + svld1ub_u16(svptrue_b16(), &src_rows[total_width_ - svcnth()]); + svuint16_t indices{}; + svuint16_t increasing = svindex_u16(0, 1); + if (start_offset <= width_ * channels_) { + svbool_t pg = svcmpge_n_u16( + svptrue_b16(), increasing, + static_cast( + start_offset - + (total_width_ - static_cast(svcnth())))); + indices = svsplice_u16(pg, increasing, border_indices_right_); + } else { + ptrdiff_t shift = + svcnth() - + (channels_ - (start_offset - width_ * channels_) % channels_); + svbool_t pg = svcmpge_n_u16(svptrue_b16(), increasing, shift); + indices = + svsplice_u16(pg, border_indices_right_ext_, border_indices_right_); + } + + return svtbl_u16(data, indices); + } + } + + private: + ptrdiff_t width_, channels_, total_width_; + svuint16_t &border_indices_left_, &border_indices_left_ext_, + &border_indices_right_, &border_indices_right_ext_; +}; // end of class GenericBorder + +template +class BorderMakerArbitrary { + using VecTraits = typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + + public: + // OK this is specialized for uint8_t :o + BorderMakerArbitrary(ptrdiff_t channels, svuint8_t& sv0, svuint8_t& sv1, + svuint8_t& sv2) KLEIDICV_STREAMING : channels_(channels), + indices0_(sv0), + indices1_(sv1), + indices2_(sv2) { + if (channels_ == 3) { + size_t kVL = VecTraits::num_lanes(); + indices0_ = svindex_u8(0, 1); + indices1_ = svindex_u8(kVL % 3, 1); + indices2_ = svindex_u8((kVL + kVL) % 3, 1); + // Decrease by 3 while they are >= 3 --> so we get the modulo + size_t steps = (kVL - 1) / 3; + for (size_t i = 0; i < steps; ++i) { + indices0_ = svsub_n_u8_m(svcmpge_n_u8(svptrue_b8(), indices0_, 3), + indices0_, 3); + indices1_ = svsub_n_u8_m(svcmpge_n_u8(svptrue_b8(), indices1_, 3), + indices1_, 3); + indices2_ = svsub_n_u8_m(svcmpge_n_u8(svptrue_b8(), indices2_, 3), + indices2_, 3); + } + } else { + indices0_ = svindex_u8(0, 1); + // It does the same as the modulo for 1,2 and 4 + indices0_ = svand_n_u8_x(svptrue_b8(), indices0_, channels_ - 1); + } + } + // Replicate only + void decorate(Rows rows, ptrdiff_t margin, + ptrdiff_t width) KLEIDICV_STREAMING { + const size_t kVL = VecTraits::num_lanes(); + svbool_t pg_ch = VecTraits::svwhilelt(0UL, rows.channels()); + + // right border + svuint8_t data = svld1_u8(pg_ch, &rows[(width - 1) * rows.channels()]); + if (rows.channels() == 3) { + svuint8_t data0 = svtbl_u8(data, indices0_); + svuint8_t data1 = svtbl_u8(data, indices1_); + svuint8_t data2 = svtbl_u8(data, indices2_); + ptrdiff_t width_plus = (width + margin) * 3; + for (ptrdiff_t x = width * 3; x < width_plus;) { + svbool_t pg = VecTraits::svwhilelt(x, width_plus); + svst1(pg, &rows[x], data0); + x += kVL; + pg = VecTraits::svwhilelt(x, width_plus); + svst1(pg, &rows[x], data1); + x += kVL; + pg = VecTraits::svwhilelt(x, width_plus); + svst1(pg, &rows[x], data2); + x += kVL; + } + } else { + data = svtbl_u8(data, indices0_); + ptrdiff_t width_plus = (width + margin) * rows.channels(); + for (ptrdiff_t x = width * rows.channels(); x < width_plus;) { + svbool_t pg = VecTraits::svwhilelt(x, width_plus); + svst1(pg, &rows[x], data); + x += kVL; + } + } + + // left border + data = svld1_u8(pg_ch, &rows[0]); + if (rows.channels() == 3) { + svuint8_t data0 = svtbl_u8(data, indices0_); + svuint8_t data1 = svtbl_u8(data, indices1_); + svuint8_t data2 = svtbl_u8(data, indices2_); + ptrdiff_t mwidth = margin * 3; + for (ptrdiff_t x = 0; x < mwidth;) { + svbool_t pg = VecTraits::svwhilelt(x, mwidth); + svst1(pg, &rows[x - mwidth], data0); + x += kVL; + pg = VecTraits::svwhilelt(x, mwidth); + svst1(pg, &rows[x - mwidth], data1); + x += kVL; + pg = VecTraits::svwhilelt(x, mwidth); + svst1(pg, &rows[x - mwidth], data2); + x += kVL; + } + } else { + data = svtbl_u8(data, indices0_); + ptrdiff_t mwidth = margin * rows.channels(); + for (ptrdiff_t x = 0; x < mwidth;) { + svbool_t pg = VecTraits::svwhilelt(x, mwidth); + svst1(pg, &rows[x - mwidth], data); + x += kVL; + } + } + } + + private: + ptrdiff_t channels_; + svuint8_t &indices0_, &indices1_, &indices2_; +}; + +template +class BorderMakerFixed3ch { + using VecTraits = typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + + public: + // OK this is specialized for uint8_t :o + BorderMakerFixed3ch(svuint8_t& sv0, svuint8_t& sv1, svuint8_t& sv2, + svbool_t& pg_last, size_t n_elements) KLEIDICV_STREAMING + : indices0_(sv0), + indices1_(sv1), + indices2_(sv2), + pg_last_(pg_last) { + size_t kVL = VecTraits::num_lanes() / sizeof(ScalarType); + indices0_ = svindex_u8(0, 1); + indices1_ = svindex_u8(kVL % 3, 1); + indices2_ = svindex_u8((kVL + kVL) % 3, 1); + pg_last_ = VecTraits::svwhilelt(0UL, ((n_elements - 1) % kVL) + 1); + // Decrease by 3 while they are >= 3 --> so we get the modulo + size_t steps = (kVL - 1) / 3; + for (size_t i = 0; i < steps; ++i) { + indices0_ = + svsub_n_u8_m(svcmpge_n_u8(svptrue_b8(), indices0_, 3), indices0_, 3); + indices1_ = + svsub_n_u8_m(svcmpge_n_u8(svptrue_b8(), indices1_, 3), indices1_, 3); + indices2_ = + svsub_n_u8_m(svcmpge_n_u8(svptrue_b8(), indices2_, 3), indices2_, 3); + } + } + + void decorate_one_side(Rows rows, svuint8_t data, + ptrdiff_t offset, ptrdiff_t kVL, + svbool_t pgtrue) KLEIDICV_STREAMING { + svuint8_t data0 = svtbl_u8(data, indices0_); + svuint8_t data1 = svtbl_u8(data, indices1_); + svuint8_t data2 = svtbl_u8(data, indices2_); + ptrdiff_t x = offset; + if constexpr (NVectors == 1) { + svst1(pg_last_, &rows[x], data0); + } else { + svst1(pgtrue, &rows[x], data0); + x += kVL; + if constexpr (NVectors == 2) { + svst1(pg_last_, &rows[x], data1); + } else { + svst1(pgtrue, &rows[x], data1); + x += kVL; + if constexpr (NVectors == 3) { + svst1(pg_last_, &rows[x], data2); + } else { + svst1(pgtrue, &rows[x], data2); + x += kVL; + if constexpr (NVectors == 4) { + svst1(pg_last_, &rows[x], data0); + } else { + svst1(pgtrue, &rows[x], data0); + x += kVL; + if constexpr (NVectors == 5) { + svst1(pg_last_, &rows[x], data1); + } else { + static_assert(false, "NVectors cannot be more than 5!"); + } + } + } + } + } + } + + // Replicate only + void decorate(Rows rows, ptrdiff_t margin, + ptrdiff_t width) KLEIDICV_STREAMING { + const ptrdiff_t kVL = static_cast(VecTraits::num_lanes()); + svbool_t pg_ch = svptrue_pat_b8(SV_VL3); + svbool_t pgtrue = svptrue_b8(); + + // right border + svuint8_t data = svld1_u8(pg_ch, &rows[(width - 1) * 3]); + decorate_one_side(rows, data, width * 3, kVL, pgtrue); + + // left border + data = svld1_u8(pg_ch, &rows[0]); + decorate_one_side(rows, data, -margin * 3, kVL, pgtrue); + } + + private: + svuint8_t &indices0_, &indices1_, &indices2_; + svbool_t& pg_last_; +}; + +template +class BorderMakerFixed124ch { + using VecTraits = typename ::KLEIDICV_TARGET_NAMESPACE::VecTraits; + using VectorType = typename VecTraits::VectorType; + + public: + // OK this is specialized for uint8_t :o + BorderMakerFixed124ch(ptrdiff_t channels, ptrdiff_t width, ptrdiff_t margin, + svuint8_t& sv, svbool_t& pg_last, + svbool_t& pg_ch) KLEIDICV_STREAMING + : indices_(sv), + pg_last_(pg_last), + pg_ch_(pg_ch), + left_margin_start_{-margin * channels}, + right_margin_start_{width * channels}, + last_column_{(width - 1) * channels} { + const ptrdiff_t kNElements = margin * channels; + indices_ = svindex_u8(0, 1); + // It does the same as the modulo for 1,2 and 4 + indices_ = svand_n_u8_x(svptrue_b8(), indices_, channels - 1); + pg_ch_ = VecTraits::svwhilelt(0L, channels); + ptrdiff_t kVL = VecTraits::num_lanes() / sizeof(ScalarType); + pg_last_ = VecTraits::svwhilelt( + 0L, ((static_cast(kNElements) - 1) % kVL) + 1); + } + + void decorate_one_side(Rows rows, svuint8_t data, + ptrdiff_t offset, ptrdiff_t kVL, + svbool_t pgtrue) KLEIDICV_STREAMING { + svuint8_t filled_data = svtbl_u8(data, indices_); + ptrdiff_t x = offset; + if constexpr (NVectors == 1) { + svst1(pg_last_, &rows[x], filled_data); + } else { + svst1(pgtrue, &rows[x], filled_data); + x += kVL; + if constexpr (NVectors == 2) { + svst1(pg_last_, &rows[x], filled_data); + } else { + svst1(pgtrue, &rows[x], filled_data); + x += kVL; + if constexpr (NVectors == 3) { + svst1(pg_last_, &rows[x], filled_data); + } else { + svst1(pgtrue, &rows[x], filled_data); + x += kVL; + if constexpr (NVectors == 4) { + svst1(pg_last_, &rows[x], filled_data); + } else { + svst1(pgtrue, &rows[x], filled_data); + x += kVL; + if constexpr (NVectors == 5) { + svst1(pg_last_, &rows[x], filled_data); + } else { + static_assert(false, "NVectors cannot be more than 5!"); + } + } + } + } + } + } + + // Replicate only + void decorate(Rows rows, ptrdiff_t = 0, + ptrdiff_t = 0) KLEIDICV_STREAMING { + const size_t kVL = VecTraits::num_lanes(); + svbool_t pgtrue = svptrue_b8(); + + // right border + svuint8_t data = svld1_u8(pg_ch_, &rows[last_column_]); + decorate_one_side(rows, data, right_margin_start_, kVL, pgtrue); + + // left border + data = svld1_u8(pg_ch_, &rows[0]); + decorate_one_side(rows, data, left_margin_start_, kVL, pgtrue); + } + + private: + svuint8_t& indices_; + svbool_t &pg_last_, &pg_ch_; + ptrdiff_t left_margin_start_, right_margin_start_, last_column_; +}; + +} // namespace KLEIDICV_TARGET_NAMESPACE + +#endif // KLEIDICV_WORKSPACE_BORDER_GENERIC_NEON_H diff --git a/kleidicv/src/filters/gaussian_blur_fixed_sc.h b/kleidicv/src/filters/gaussian_blur_fixed_sc.h index 2971058d7..41d20f5b9 100644 --- a/kleidicv/src/filters/gaussian_blur_fixed_sc.h +++ b/kleidicv/src/filters/gaussian_blur_fixed_sc.h @@ -8,6 +8,8 @@ #include #include +#include "border_generic_sc.h" +#include "kleidicv/ctypes.h" #include "kleidicv/filters/gaussian_blur.h" #include "kleidicv/filters/separable_filter_15x15_sc.h" #include "kleidicv/filters/separable_filter_21x21_sc.h" @@ -15,6 +17,7 @@ #include "kleidicv/filters/separable_filter_5x5_sc.h" #include "kleidicv/filters/separable_filter_7x7_sc.h" #include "kleidicv/filters/sigma.h" +#include "kleidicv/sve2.h" #include "kleidicv/workspace/separable.h" #if KLEIDICV_TARGET_SME || KLEIDICV_TARGET_SME2 @@ -274,18 +277,7 @@ class GaussianBlur { void vertical_scalar_path(const SourceType src[KernelSize], BufferType *dst) const KLEIDICV_STREAMING { - uint32_t acc = static_cast(src[kHalfKernelSize - 1]) * - half_kernel_[kHalfKernelSize - 1]; - - // Optimization to avoid unnecessary branching in vector code. - KLEIDICV_FORCE_LOOP_UNROLL - for (size_t i = 0; i < kHalfKernelSize - 1; i++) { - acc += (static_cast(src[i]) + - static_cast(src[KernelSize - i - 1])) * - half_kernel_[i]; - } - - dst[0] = static_cast(rounding_shift_right(acc, 8)); + common_scalar_path(src, dst); } void horizontal_vector_path( @@ -296,7 +288,7 @@ class GaussianBlur { void horizontal_scalar_path(const BufferType src[KernelSize], DestinationType *dst) const KLEIDICV_STREAMING { - vertical_scalar_path(src, dst); + common_scalar_path(src, dst); } private: @@ -329,6 +321,22 @@ class GaussianBlur { svst1(pg, &dst[0], result); } + void common_scalar_path(const SourceType src[KernelSize], + BufferType *dst) const KLEIDICV_STREAMING { + uint32_t acc = static_cast(src[kHalfKernelSize - 1]) * + half_kernel_[kHalfKernelSize - 1]; + + // Avoid unnecessary branching in vector code. + KLEIDICV_FORCE_LOOP_UNROLL + for (size_t i = 0; i < kHalfKernelSize - 1; i++) { + acc += (static_cast(src[i]) + + static_cast(src[KernelSize - i - 1])) * + half_kernel_[i]; + } + + dst[0] = static_cast(rounding_shift_right(acc, 8)); + } + const uint16_t *half_kernel_; }; // end of class GaussianBlur @@ -345,6 +353,7 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( if constexpr (IsBinomial) { GaussianBlurFilter blur; + // TODO uint16_t BorderMaker SeparableFilter filter{blur}; workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, border_type, filter); @@ -352,6 +361,7 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( return KLEIDICV_OK; } else { constexpr size_t kHalfKernelSize = get_half_kernel_size(KernelSize); + constexpr size_t kMargin = kHalfKernelSize - 1; uint16_t half_kernel[128]; generate_gaussian_half_kernel(half_kernel, kHalfKernelSize, sigma); // If sigma is so small that the middle point gets all the weights, it's @@ -359,8 +369,59 @@ static kleidicv_error_t gaussian_blur_fixed_kernel_size( if (half_kernel[kHalfKernelSize - 1] < 256) { GaussianBlurFilter blur(half_kernel); SeparableFilter filter{blur}; - workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, - border_type, filter); + // Maximum is (3 or 4)*10 = 30 or 40 elements -> 2 or 3 vectors + // const size_t nElements = kMargin * channels; + // const size_t nVectors = + // (nElements + VecTraits::num_lanes() - 1) / + // VecTraits::num_lanes(); + svuint8_t sv0, sv1, sv2; + KLEIDICV_TARGET_NAMESPACE::BorderMakerArbitrary border_maker( + static_cast(channels), sv0, sv1, sv2); + workspace->process_using_bordermaker(rect, y_begin, y_end, src_rows, + dst_rows, channels, border_type, + filter, border_maker); + /* + if (channels == 3) { + svuint8_t sv0, sv1, sv2; + svbool_t pg; + if (nVectors == 1) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed3ch + border_maker(sv0, sv1, sv2, pg, nElements); + workspace->process_using_bordermaker(rect, y_begin, y_end, + src_rows, dst_rows, channels, border_type, filter, border_maker); } + else if (nVectors == 2) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed3ch + border_maker(sv0, sv1, sv2, pg, nElements); + workspace->process_using_bordermaker(rect, y_begin, y_end, + src_rows, dst_rows, channels, border_type, filter, border_maker); + + } else { + return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + } else { + svuint8_t sv; + svbool_t pg0, pg1; + if (nVectors == 1) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed124ch + border_maker(static_cast(channels), + static_cast(rect.width()), kMargin, + sv, pg0, pg1); workspace->process_using_bordermaker(rect, y_begin, + y_end, src_rows, dst_rows, channels, border_type, filter, + border_maker); } else if (nVectors == 2) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed124ch + border_maker(static_cast(channels), + static_cast(rect.width()), kMargin, + sv, pg0, pg1); workspace->process_using_bordermaker(rect, y_begin, + y_end, src_rows, dst_rows, channels, border_type, filter, + border_maker); } else if (nVectors == 3) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed124ch + border_maker(static_cast(channels), + static_cast(rect.width()), kMargin, + sv, pg0, pg1); workspace->process_using_bordermaker(rect, y_begin, + y_end, src_rows, dst_rows, channels, border_type, filter, + border_maker); } else { return KLEIDICV_ERROR_NOT_IMPLEMENTED; + } + }*/ } else { for (size_t row = y_begin; row < y_end; ++row) { #if KLEIDICV_TARGET_SME diff --git a/kleidicv/src/filters/separable_filter_2d_api.cpp b/kleidicv/src/filters/separable_filter_2d_api.cpp index 9bebc29d7..0ace6fed3 100644 --- a/kleidicv/src/filters/separable_filter_2d_api.cpp +++ b/kleidicv/src/filters/separable_filter_2d_api.cpp @@ -83,7 +83,7 @@ kleidicv_error_t kleidicv_filter_context_create( constexpr size_t intermediate_size = sizeof(uint32_t); auto workspace = SeparableFilterWorkspace::create( Rectangle{max_image_width, max_image_height}, max_channels, - intermediate_size); + intermediate_size, max_kernel_width); if (!workspace) { *context = nullptr; return KLEIDICV_ERROR_ALLOCATION; diff --git a/kleidicv/src/filters/separable_filter_2d_sc.h b/kleidicv/src/filters/separable_filter_2d_sc.h index 02f0c6fb6..d350f7c53 100644 --- a/kleidicv/src/filters/separable_filter_2d_sc.h +++ b/kleidicv/src/filters/separable_filter_2d_sc.h @@ -7,6 +7,7 @@ #include +#include "border_generic_sc.h" #include "kleidicv/filters/separable_filter_5x5_sc.h" #include "kleidicv/kleidicv.h" #include "kleidicv/sve2.h" @@ -499,6 +500,7 @@ kleidicv_error_t separable_filter_2d_stripe_sc( kernel_y_0, kernel_y_1, kernel_y_2, kernel_y_3, kernel_y_4}; SeparableFilter filter{filterClass}; + // TODO use BorderMaker??? Rows src_rows{src, src_stride, channels}; Rows dst_rows{dst, dst_stride, channels}; workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, diff --git a/kleidicv/src/filters/sobel_sc.h b/kleidicv/src/filters/sobel_sc.h index e018f9a63..009495652 100644 --- a/kleidicv/src/filters/sobel_sc.h +++ b/kleidicv/src/filters/sobel_sc.h @@ -5,6 +5,7 @@ #ifndef KLEIDICV_SOBEL_SC_H #define KLEIDICV_SOBEL_SC_H +#include "border_generic_sc.h" #include "kleidicv/filters/separable_filter_3x3_sc.h" #include "kleidicv/filters/sobel.h" #include "kleidicv/kleidicv.h" @@ -137,11 +138,12 @@ static kleidicv_error_t sobel_3x3_horizontal_stripe_s16_u8_sc( Rows dst_rows{dst, dst_stride, channels}; auto workspace = - SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t)); + SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t), 3); if (!workspace) { return KLEIDICV_ERROR_ALLOCATION; } + // TODO use BorderMaker?? HorizontalSobel3x3 horizontal_sobel; SeparableFilter3x3> filter{horizontal_sobel}; workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, @@ -167,11 +169,12 @@ static kleidicv_error_t sobel_3x3_vertical_stripe_s16_u8_sc( Rows dst_rows{dst, dst_stride, channels}; auto workspace = - SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t)); + SeparableFilterWorkspace::create(rect, channels, sizeof(int16_t), 3); if (!workspace) { return KLEIDICV_ERROR_ALLOCATION; } + // TODO use BorderMaker (int16_t) VerticalSobel3x3 vertical_sobel; SeparableFilter3x3> filter{vertical_sobel}; workspace->process(rect, y_begin, y_end, src_rows, dst_rows, channels, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ab408a553..26f71378e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: 2023 - 2024 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: 2023 - 2025 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 @@ -56,18 +56,20 @@ FetchContent_Declare( FetchContent_MakeAvailable(googletest) add_subdirectory(api) +add_subdirectory(library) add_subdirectory(framework) # Target to build all tests. add_custom_target( kleidicv-test - DEPENDS kleidicv-framework-test kleidicv-api-test + DEPENDS kleidicv-framework-test kleidicv-library-test kleidicv-api-test ) # Target to build and run all tests. add_custom_target( check-kleidicv COMMAND kleidicv-framework-test + COMMAND kleidicv-library-test COMMAND kleidicv-api-test DEPENDS kleidicv-test USES_TERMINAL diff --git a/test/api/test_gaussian_blur.cpp b/test/api/test_gaussian_blur.cpp index 4bc90b2cb..fa5073387 100644 --- a/test/api/test_gaussian_blur.cpp +++ b/test/api/test_gaussian_blur.cpp @@ -275,23 +275,23 @@ size_t minimumValidWidth(size_t kernel_size, size_t vector_length) { TYPED_TEST(GaussianBlur, 3x3_CustomSigma) { using KernelTestParams = GaussianBlurKernelTestParams; GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(2.2) .test_with_generated_mask(); GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} - .with_sigma(0.01) + kReplicateBorder, kToleranceOne} + .with_sigma(0.01) .test_with_generated_mask(); } TYPED_TEST(GaussianBlur, 5x5_CustomSigma) { using KernelTestParams = GaussianBlurKernelTestParams; GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(2.2) .test_with_generated_mask(); GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(0.01) .test_with_generated_mask(); } @@ -299,16 +299,16 @@ TYPED_TEST(GaussianBlur, 5x5_CustomSigma) { TYPED_TEST(GaussianBlur, 7x7_CustomSigma) { using KernelTestParams = GaussianBlurKernelTestParams; GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(2.2) .test_with_generated_mask(); GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(0.01) .test_with_generated_mask(); } -// 11x11 use the generic solution. +// 11x11 use the "ArbitrarySizeKernel" solution. TYPED_TEST(GaussianBlur, 11x11_CustomSigma) { using KernelTestParams = GaussianBlurKernelTestParams; // TODO kReplicateBorder is temporary until we implement all borders @@ -329,14 +329,14 @@ TYPED_TEST(GaussianBlur, 11x11_CustomSigma) { TYPED_TEST(GaussianBlur, 15x15_CustomSigma) { using KernelTestParams = GaussianBlurKernelTestParams; GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .test_with_generated_mask(); GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(2.2) .test_with_generated_mask(); GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(0.01) .test_with_generated_mask(); } @@ -346,14 +346,14 @@ TYPED_TEST(GaussianBlur, 21x21_CustomSigma) { using KernelTestParams = GaussianBlurKernelTestParams; // TODO kReplicateBorder is temporary until we implement all borders GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .test_with_generated_mask(); GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(2.2) .test_with_generated_mask(); GaussianBlurTest{KernelTestParams{}, minimal_array_layouts_for_fixed, - kAllBorders, kToleranceOne} + kReplicateBorder, kToleranceOne} .with_sigma(0.01) .test_with_generated_mask(); } diff --git a/test/library/CMakeLists.txt b/test/library/CMakeLists.txt new file mode 100644 index 000000000..6fbb64110 --- /dev/null +++ b/test/library/CMakeLists.txt @@ -0,0 +1,69 @@ +# SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +# +# SPDX-License-Identifier: Apache-2.0 + +# Choose the default value for the KLEIDICV_ENABLE_SVE2 option +# according to the compiler version. The list of compiler versions +# recognised as supporting SVE may be extended in future. +# check_cxx_compiler_flag is not used to test whether the compiler +# supports +sve2 since this may succeed for compilers that have only +# partial SVE support. +if ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10) +) + set(KLEIDICV_ENABLE_SVE2 ON) +else() + set(KLEIDICV_ENABLE_SVE2 OFF) +endif() + +message(STATUS "KLEIDICV_ENABLE_SVE2 ${KLEIDICV_ENABLE_SVE2}") + +file(GLOB kleidicv_library_test_sources CONFIGURE_DEPENDS "*.h" "*.cpp") + +if (KLEIDICV_ENABLE_SVE2) + list(APPEND KLEIDICV_TEST_CXX_FLAGS "-march=armv8-a+sve2") +else() + list(FILTER kleidicv_library_test_sources EXCLUDE REGEX "_sve2\\.cpp$") +endif() + +list(APPEND kleidicv_library_test_sources ${KLEIDICV_TEST_FRAMEWORK_SOURCES}) + +set_source_files_properties( + ${kleidicv_library_test_sources} + PROPERTIES COMPILE_OPTIONS "${KLEIDICV_TEST_CXX_FLAGS}" +) + +add_executable( + kleidicv-library-test + ${kleidicv_library_test_sources} +) + +set_target_properties( + kleidicv-library-test + PROPERTIES CXX_STANDARD 17 +) + +target_include_directories( + kleidicv-library-test + PRIVATE ${KLEIDICV_INCLUDE_DIR} + PRIVATE ${KLEIDICV_TEST_INCLUDE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../kleidicv_thread/include +) + +if (KLEIDICV_ALLOCATION_TESTS) + target_link_options( + kleidicv-library-test + PRIVATE -Wl,--wrap,malloc + ) +endif() + + +target_link_libraries( + kleidicv-library-test + kleidicv + kleidicv_thread + gtest + gmock +) diff --git a/test/library/test_border_generic_sve2.cpp b/test/library/test_border_generic_sve2.cpp new file mode 100644 index 000000000..30af3016c --- /dev/null +++ b/test/library/test_border_generic_sve2.cpp @@ -0,0 +1,162 @@ +// SPDX-FileCopyrightText: 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include "../../kleidicv/include/kleidicv/types.h" +#include "../../kleidicv/src/filters/border_generic_sc.h" +#include "framework/array.h" +#include "framework/utils.h" + +template +void test_sve_border(size_t width, size_t margin, size_t channels, + std::initializer_list expected_values) { + size_t total_width = channels * (width + 2 * margin); + test::Array2D expected(total_width, 1); + expected.set(0, 0, expected_values); + + test::Array2D actual(total_width, 1); + for (size_t x = 0; x < width * channels; ++x) { + actual.set(0, margin * channels + x, + {*expected.at(0, margin * channels + x)}); + } + + kleidicv::Rows rows(actual.at(0, margin * channels), width, + channels); + const size_t nElements = margin * channels; + const size_t nVectors = + (nElements + test::Options::vector_lanes() - 1) / + test::Options::vector_lanes(); + + if (channels == 3) { + svuint8_t sv0, sv1, sv2; + svbool_t pg; + if (nVectors == 1) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed3ch border( + sv0, sv1, sv2, pg, nElements); + border.decorate(rows, margin, width); + } else if (nVectors == 2) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed3ch border( + sv0, sv1, sv2, pg, nElements); + border.decorate(rows, margin, width); + } else { + // TODO test error handling + } + } else { + svuint8_t sv; + svbool_t pg0, pg1; + if (nVectors == 1) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed124ch border( + static_cast(channels), static_cast(width), + static_cast(margin), sv, pg0, pg1); + border.decorate(rows); + } else if (nVectors == 2) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed124ch border( + static_cast(channels), static_cast(width), + static_cast(margin), sv, pg0, pg1); + border.decorate(rows); + } else if (nVectors == 3) { + KLEIDICV_TARGET_NAMESPACE::BorderMakerFixed124ch border( + static_cast(channels), static_cast(width), + static_cast(margin), sv, pg0, pg1); + border.decorate(rows); + } else { + // TODO test error handling + } + + EXPECT_EQ_ARRAY2D(expected, actual); + } +} + +TEST(BorderMaker, Replicate_1Ch_1Element) { + test_sve_border(6, 1, 1, {1, 1, 2, 3, 4, 5, 6, 6}); +} + +TEST(BorderMaker, Replicate_1Ch_2Elements) { + test_sve_border(6, 2, 1, {1, 1, 1, 2, 3, 4, 5, 6, 6, 6}); +} + +TEST(BorderMaker, Replicate_1Ch_3Elements) { + test_sve_border(6, 3, 1, {1, 1, 1, 1, 2, 3, 4, 5, 6, 6, 6, 6}); +} + +TEST(BorderMaker, Replicate_1Ch_9Elements) { + test_sve_border(6, 9, 1, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6}); +} + +TEST(BorderMaker, Replicate_2Ch_1Element) { + test_sve_border(3, 1, 2, {1, 2, 1, 2, 3, 4, 5, 6, 5, 6}); +} + +TEST(BorderMaker, Replicate_2Ch_2Elements) { + test_sve_border(3, 2, 2, {1, 2, 1, 2, 1, 2, 3, 4, 5, 6, 5, 6, 5, 6}); +} + +TEST(BorderMaker, Replicate_2Ch_3Elements) { + test_sve_border( + 3, 3, 2, {1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6, 5, 6, 5, 6, 5, 6}); +} + +TEST(BorderMaker, Replicate_2Ch_5Elements) { + test_sve_border(3, 5, 2, {1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, + 4, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6}); +} + +TEST(BorderMaker, Replicate_3Ch_1Element) { + test_sve_border(3, 1, 3, + {1, 2, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 7, 8, 9}); +} + +TEST(BorderMaker, Replicate_3Ch_2Elements) { + test_sve_border( + 3, 2, 3, {1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 7, 8, 9, 7, 8, 9}); +} + +TEST(BorderMaker, Replicate_3Ch_3Elements) { + test_sve_border(3, 3, 3, {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9}); +} + +TEST(BorderMaker, Replicate_3Ch_5Elements) { + test_sve_border( + 3, 5, 3, {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9}); +} + +TEST(BorderMaker, Replicate_3Ch_17Elements) { + test_sve_border( + 3, 17, 3, + {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, + 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, + 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, + 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, + 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9, 7, 8, 9}); +} + +TEST(BorderMaker, Replicate_4Ch_1Element) { + test_sve_border(3, 1, 4, {1, 2, 3, 4, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 9, 10, 11, 12}); +} + +TEST(BorderMaker, Replicate_4Ch_2Elements) { + test_sve_border(3, 2, 4, + {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12}); +} + +TEST(BorderMaker, Replicate_4Ch_3Elements) { + test_sve_border( + 3, 3, 4, + {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12}); +} + +TEST(BorderMaker, Replicate_4Ch_5Elements) { + test_sve_border( + 3, 5, 4, + {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, + 3, 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 10, 11, 12, + 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12}); +} -- GitLab