diff --git a/CMakeLists.txt b/CMakeLists.txt index d2f4b5feced58d4e0b74ae9232d9cb71aae588e9..ea7b9926b7f2dd4514a77d4868ff0bed5c16da05 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -330,6 +330,7 @@ if(KLEIDIAI_BUILD_TESTS) add_library(kleidiai_test_framework test/common/bfloat16.cpp + test/common/buffer.cpp test/common/compare.cpp test/common/cpu_info.cpp test/common/data_format.cpp @@ -388,6 +389,7 @@ if(KLEIDIAI_BUILD_TESTS) else() add_executable(kleidiai_test test/tests/bfloat16_test.cpp + test/tests/buffer_test.cpp test/tests/float16_test.cpp test/tests/imatmul_test.cpp test/tests/matmul_clamp_f16_bf16p_bf16p_test.cpp diff --git a/test/common/buffer.cpp b/test/common/buffer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..65c9e261f272aa91e0a3e3a4122a91340d2eaad0 --- /dev/null +++ b/test/common/buffer.cpp @@ -0,0 +1,114 @@ +// +// SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include "buffer.hpp" + +#if defined(__linux__) || defined(__APPLE__) +#include +#include +#endif // defined(__linux__) || defined(__APPLE__) + +#include +#include +#include +#include +#include + +#include "kai/kai_common.h" + +namespace kai::test { + +Buffer::Buffer(const size_t size) : m_user_buffer_size(size) { + KAI_ASSUME_MSG(size > 0, "Buffers must be of non-zero size"); + + const char* val = getenv("KAI_TEST_BUFFER_POLICY"); + const std::string buffer_policy = (val != nullptr) ? std::string(val) : std::string("NONE"); + + std::ostringstream oss; + + if (buffer_policy == "PROTECT_UNDERFLOW" || buffer_policy == "PROTECT_OVERFLOW") { +#if defined(__linux__) || defined(__APPLE__) + m_protection_policy = (buffer_policy == "PROTECT_UNDERFLOW") ? BufferProtectionPolicy::ProtectUnderflow + : BufferProtectionPolicy::ProtectOverflow; +#else // defined(__linux__) || defined(__APPLE__) + oss << buffer_policy << " buffer protection policy is not supported on target platform"; +#endif // defined(__linux__) || defined(__APPLE__) + } else if (buffer_policy == "NONE" || buffer_policy == "") { + m_protection_policy = BufferProtectionPolicy::None; + } else { + oss << "Unrecognized buffer protection policy provided by KAI_TEST_BUFFER_POLICY: "; + oss << buffer_policy; + } + + if (!oss.str().empty()) { + KAI_ERROR(oss.str().c_str()); + } + + switch (m_protection_policy) { +#if defined(__linux__) || defined(__APPLE__) + case BufferProtectionPolicy::ProtectUnderflow: + case BufferProtectionPolicy::ProtectOverflow: + allocate_with_guard_pages(); + break; +#endif // defined(__linux__) || defined(__APPLE__) + default: + allocate(); + } +} + +void Buffer::allocate() { + m_buffer = handle(std::malloc(m_user_buffer_size), &std::free); + KAI_ASSUME_MSG(m_buffer.get() != nullptr, "Failure allocating memory"); + KAI_ASSUME_MSG(m_user_buffer_offset == 0, "Buffer offset must be zero for naive allocation"); +} + +#if defined(__linux__) || defined(__APPLE__) +void Buffer::allocate_with_guard_pages() { + const auto sc_pagesize_res = sysconf(_SC_PAGESIZE); + KAI_ASSUME_MSG(sc_pagesize_res != -1, "Error finding page size"); + + const auto page_size = static_cast(sc_pagesize_res); + + // Offset the user buffer by the size of the first guard page + m_user_buffer_offset = page_size; + + // The user buffer is rounded to the size of the nearest whole page. + // This forms the valid region between the two guard pages + const size_t valid_region_size = kai_roundup(m_user_buffer_size, page_size); + const size_t protected_region_size = 2 * page_size; + const size_t total_memory_size = valid_region_size + protected_region_size; + + if (m_protection_policy == BufferProtectionPolicy::ProtectOverflow) { + // To detect overflows we offset the user buffer so that edge of the buffer is aligned to the start of the + // higher guard page thus detecting whenever a buffer overflow occurs. + m_user_buffer_offset += valid_region_size - m_user_buffer_size; + } + + auto mmap_deleter = [total_memory_size](void* ptr) { + if (munmap(ptr, total_memory_size) != 0) { + KAI_ERROR("Failure deleting memory mappings"); + } + }; + + m_buffer = + handle(mmap(nullptr, total_memory_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0), mmap_deleter); + if (m_buffer.get() == MAP_FAILED) { + KAI_ERROR("Failure mapping memory"); + } + + void* head_guard_page = m_buffer.get(); + void* tail_guard_page = static_cast(m_buffer.get()) + (total_memory_size - page_size); + + if (mprotect(head_guard_page, std::max(static_cast(0), page_size), PROT_NONE) != 0) { + KAI_ERROR("Failure protecting page immediately preceding buffer"); + } + if (mprotect(tail_guard_page, std::max(static_cast(0), page_size), PROT_NONE) != 0) { + KAI_ERROR("Failure protecting page immediately following buffer"); + } +} +#endif // defined(__linux__) || defined(__APPLE__) + +} // namespace kai::test diff --git a/test/common/buffer.hpp b/test/common/buffer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a2226f53415ddf13f3d51ef98d9bdb79a86e206e --- /dev/null +++ b/test/common/buffer.hpp @@ -0,0 +1,90 @@ +// +// SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +namespace kai::test { + +/// Buffer is a high-level abstraction for a block of memory. +/// +/// The class performs dynamic memory allocation and management in an opaque manner. The underlying memory resource can +/// be requested using the familiar @ref Buffer::data() method and interacted with using @ref +/// kai::test::read_array() and @ref kai::test::write_array() utilities. +/// +/// Buffer comes with protection mechanisms defined by @ref BufferProtectionPolicy. These are enabled by setting the +/// KAI_TEST_BUFFER_POLICY environment variable, for example: +/// KAI_TEST_BUFFER_POLICY=PROTECT_UNDERFLOW to enable @ref BufferProtectionPolicy::ProtectUnderflow. +/// KAI_TEST_BUFFER_POLICY=PROTECT_OVERFLOW to enable @ref BufferProtectionPolicy::ProtectOverflow. +/// +class Buffer { + // Handle to the underlying memory resource and its deleter + using handle = std::unique_ptr>; + +public: + explicit Buffer(size_t size); + + Buffer(const Buffer& other) = delete; + Buffer(Buffer&& other) noexcept = default; + Buffer& operator=(const Buffer& other) = delete; + Buffer& operator=(Buffer&& other) noexcept = default; + + ~Buffer() = default; + + /// Gets the base memory address of the user buffer. + /// + /// @return Base memory address of the user buffer. + [[nodiscard]] void* data() const { + return static_cast(m_buffer.get()) + m_user_buffer_offset; + } + + /// Gets the size of the user buffer. + /// + /// Depending on the @ref BufferProtectionPolicy policy enabled, the actual size of memory allocated may be larger. + /// However, this function guarantees to always provide the size of the user buffer only. + /// + /// @return Size of the user buffer in bytes. + [[nodiscard]] size_t size() const { + return m_user_buffer_size; + } + +private: + /// Buffer can be protected with one of the following protection policies: + /// - @ref BufferProtectionPolicy::None No protection mechanisms are enabled. + /// - @ref BufferProtectionPolicy::ProtectUnderflow Memory equal to the size of the user buffer rounded to the + /// nearest whole page plus adjacent guard pages is allocated, + /// and the user buffer is aligned to the end of the head guard + /// page thus detecting whenever a buffer underflow occurs. + /// - @ref BufferProtectionPolicy::ProtectOverflow Same as above, but now the edge of the user buffer is aligned + /// to the start of the tail guard page thus detecting whenever a + /// buffer overflow occurs. + enum class BufferProtectionPolicy : uint8_t { + None = 0, + ProtectUnderflow = 1, + ProtectOverflow = 2, + }; + + /// Naively allocate memory. + void allocate(); + +#if defined(__linux__) || defined(__APPLE__) + /// Allocate memory with adjacent guard pages. + void allocate_with_guard_pages(); +#endif // defined(__linux__) || defined(__APPLE__) + + handle m_buffer = nullptr; + + size_t m_user_buffer_size; + size_t m_user_buffer_offset = 0; + + BufferProtectionPolicy m_protection_policy = BufferProtectionPolicy::None; +}; + +} // namespace kai::test diff --git a/test/tests/buffer_test.cpp b/test/tests/buffer_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fbd5e25930f3ddde5f4ce1e46b375b97973c4d7a --- /dev/null +++ b/test/tests/buffer_test.cpp @@ -0,0 +1,168 @@ +// +// SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test/common/buffer.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace kai::test { + +namespace { +constexpr size_t g_num_runs = 100; +} // namespace + +TEST(Buffer, NonePolicy) { + std::random_device rd; + std::mt19937 rng(rd()); + std::uniform_int_distribution dist(1, std::numeric_limits::max()); + + // Store the current buffer policy + std::string buffer_policy; + if (const char* buffer_policy_env = getenv("KAI_TEST_BUFFER_POLICY")) { + buffer_policy = std::string(buffer_policy_env); + } + + // Overwrite the buffer policy for purpose of the test + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", "NONE", 1 /* overwrite */), 0); + + for (size_t i = 0; i < g_num_runs; ++i) { + const size_t buffer_size = dist(rng); + + const auto buffer = Buffer(buffer_size); + + const auto* data = static_cast(buffer.data()); + ASSERT_NE(data, nullptr); + } + + // Restore the buffer policy to its original value + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", buffer_policy.c_str(), 1 /* overwrite */), 0); +} + +TEST(Buffer, InvalidPolicy) { + std::random_device rd; + std::mt19937 rng(rd()); + std::uniform_int_distribution dist(1, std::numeric_limits::max()); + + // Store the current buffer policy + std::string buffer_policy; + if (const char* buffer_policy_env = getenv("KAI_TEST_BUFFER_POLICY")) { + buffer_policy = std::string(buffer_policy_env); + } + + // Overwrite the buffer policy for purpose of the test + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", "INVALID_POLICY_TEST", 1 /* overwrite */), 0); + + for (size_t i = 0; i < g_num_runs; ++i) { + const size_t buffer_size = dist(rng); + +// Ignore missing default case in switch statement in test dependency macro +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" + EXPECT_DEATH({ [[maybe_unused]] const auto buffer = Buffer(buffer_size); }, ""); +#pragma GCC diagnostic pop + } + + // Restore the buffer policy to its original value + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", buffer_policy.c_str(), 1 /* overwrite */), 0); +} + +#if defined(__linux__) || defined(__APPLE__) +TEST(Buffer, ProtectUnderflowPolicy) { + std::random_device rd; + std::mt19937 rng(rd()); + std::uniform_int_distribution dist(1, std::numeric_limits::max()); + + // Store the current buffer policy + std::string buffer_policy; + if (const char* buffer_policy_env = getenv("KAI_TEST_BUFFER_POLICY")) { + buffer_policy = std::string(buffer_policy_env); + } + + // Overwrite the buffer policy for purpose of the test + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", "PROTECT_UNDERFLOW", 1 /* overwrite */), 0); + + for (size_t i = 0; i < g_num_runs; ++i) { + const size_t buffer_size = dist(rng); + + const auto buffer = Buffer(buffer_size); + + const auto* data = static_cast(buffer.data()); + ASSERT_NE(data, nullptr); + ASSERT_NE(data, MAP_FAILED); + +// Ignore missing default case in switch statement in test dependency macro +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" + EXPECT_EXIT( + // Underflow by one byte + { [[maybe_unused]] const volatile auto val = *--data; }, + [](const size_t exit_status) { + return testing::KilledBySignal(SIGBUS)(exit_status) || // + testing::KilledBySignal(SIGSEGV)(exit_status) || // + testing::KilledBySignal(SIGABRT)(exit_status); // + }, + ""); +#pragma GCC diagnostic pop + } + + // Restore the buffer policy to its original value + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", buffer_policy.c_str(), 1 /* overwrite */), 0); +} + +TEST(Buffer, ProtectOverflowPolicy) { + std::random_device rd; + std::mt19937 rng(rd()); + std::uniform_int_distribution dist(1, std::numeric_limits::max()); + + // Store the current buffer policy + std::string buffer_policy; + if (const char* buffer_policy_env = getenv("KAI_TEST_BUFFER_POLICY")) { + buffer_policy = std::string(buffer_policy_env); + } + + // Overwrite the buffer policy for purpose of the test + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", "PROTECT_OVERFLOW", 1 /* overwrite */), 0); + + for (size_t i = 0; i < g_num_runs; ++i) { + const size_t buffer_size = dist(rng); + + const auto buffer = Buffer(buffer_size); + + const auto* data = static_cast(buffer.data()); + ASSERT_NE(data, nullptr); + ASSERT_NE(data, MAP_FAILED); + +// Ignore missing default case in switch statement in test dependency macro +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-default" + EXPECT_EXIT( + // Overflow by one byte + { [[maybe_unused]] const volatile auto val = *(data + buffer_size); }, + [](const size_t exit_status) { + return testing::KilledBySignal(SIGBUS)(exit_status) || // + testing::KilledBySignal(SIGSEGV)(exit_status) || // + testing::KilledBySignal(SIGABRT)(exit_status); // + }, + ""); +#pragma GCC diagnostic pop + } + + // Restore the buffer policy to its original value + ASSERT_EQ(setenv("KAI_TEST_BUFFER_POLICY", buffer_policy.c_str(), 1 /* overwrite */), 0); +} +#endif // if defined(__linux__) || defined(__APPLE__) + +} // namespace kai::test diff --git a/test/tests/matmul_clamp_f32_f32_f32p_test.cpp b/test/tests/matmul_clamp_f32_f32_f32p_test.cpp index be124f2212ed0d3e12d9bd4cf66f0e70027c141e..9842e9fb60f6f42a3f6d3fd28e6dd2ee1351e81f 100644 --- a/test/tests/matmul_clamp_f32_f32_f32p_test.cpp +++ b/test/tests/matmul_clamp_f32_f32_f32p_test.cpp @@ -10,9 +10,11 @@ #include #include #include -#include +#include #include +#include #include +#include #include #include "kai/kai_common.h" @@ -21,12 +23,12 @@ #include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p_interface.h" #include "kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p16vlx1b_f32_f32_sme.h" #include "kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.h" +#include "test/common/buffer.hpp" #include "test/common/cpu_info.hpp" #include "test/common/data_type.hpp" #include "test/common/memory.hpp" #include "test/common/test_suite.hpp" #include "test/reference/clamp.hpp" -#include "test/reference/fill.hpp" #include "test/reference/matmul.hpp" namespace kai::test { @@ -59,6 +61,41 @@ const std::array, 2> ukern kai_run_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla}, "matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla", cpu_has_sme2}}}; + +// TODO: Reimplement these helpers in fill.cpp. These methods are currently duplicated here so they can be specialized +// on the Buffer return type. +template +Buffer fill_matrix_raw(size_t height, size_t width, std::function gen) { + const auto size = height * width * size_in_bits / 8; + KAI_ASSUME(width * size_in_bits % 8 == 0); + + Buffer data(size); + auto ptr = static_cast(data.data()); + + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + write_array(ptr, y * width + x, gen(y, x)); + } + } + + return data; +} + +template +Buffer fill_matrix_random_raw(size_t height, size_t width, uint32_t seed) { + using TDist = std::conditional_t< + std::is_floating_point_v, std::uniform_real_distribution, std::uniform_int_distribution>; + + std::mt19937 rnd(seed); + TDist dist; + + return fill_matrix_raw(height, width, [&](size_t, size_t) { return dist(rnd); }); +} + +template +Buffer fill_random(size_t length, uint32_t seed) { + return fill_matrix_random_raw(1, length, seed); +} } // namespace class MatMulTest_f32_f32_f32p : public ::testing::TestWithParam {}; @@ -85,9 +122,9 @@ TEST_P(MatMulTest_f32_f32_f32p, EndToEnd) // NOLINT(google-readability-avoid-un const auto sr = ukernel_variant.interface.get_sr(); // Generates input data. - const auto ref_lhs = fill_random(m * k, seed + 0); - const auto ref_rhs = fill_random(n * k, seed + 1); - const auto ref_bias = fill_random(n, seed + 2); + const Buffer ref_lhs(fill_random(m * k, seed + 0)); + const Buffer ref_rhs(fill_random(n * k, seed + 1)); + const Buffer ref_bias(fill_random(n, seed + 2)); // Runs the reference implementation const auto ref_dst_no_clamp = matmul( @@ -103,19 +140,19 @@ TEST_P(MatMulTest_f32_f32_f32p, EndToEnd) // NOLINT(google-readability-avoid-un const auto rhs_stride = n * sizeof(float); size_t imp_packed_rhs_size = 0; - std::unique_ptr> imp_packed_rhs; + std::unique_ptr imp_packed_rhs; switch (variant_idx) { case 0: // matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla imp_packed_rhs_size = kai_get_rhs_packed_size_rhs_pack_kxn_f32p16vlx1b_f32_f32_sme(n, k); - imp_packed_rhs = std::make_unique>(imp_packed_rhs_size); + imp_packed_rhs = std::make_unique(imp_packed_rhs_size); kai_run_rhs_pack_kxn_f32p16vlx1b_f32_f32_sme( 1, n, k, nr, kr, sr, rhs_stride, ref_rhs.data(), ref_bias.data(), nullptr, imp_packed_rhs->data(), 0, nullptr); break; case 1: // matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla imp_packed_rhs_size = kai_get_rhs_packed_size_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(n, k); - imp_packed_rhs = std::make_unique>(imp_packed_rhs_size); + imp_packed_rhs = std::make_unique(imp_packed_rhs_size); kai_run_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme( 1, n, k, nr, kr, sr, rhs_stride, ref_rhs.data(), ref_bias.data(), nullptr, imp_packed_rhs->data(), 0, nullptr); @@ -128,9 +165,9 @@ TEST_P(MatMulTest_f32_f32_f32p, EndToEnd) // NOLINT(google-readability-avoid-un const auto imp_dst_size = ukernel_variant.interface.get_dst_size(m, n); ASSERT_EQ(imp_dst_size, ref_dst.size()); - std::vector imp_dst(imp_dst_size); + Buffer imp_dst(imp_dst_size); ukernel_variant.interface.run_matmul( - m, n, k, ref_lhs.data(), 1, imp_packed_rhs->data(), reinterpret_cast(imp_dst.data()), 1, 1, clamp_min, + m, n, k, ref_lhs.data(), 1, imp_packed_rhs->data(), static_cast(imp_dst.data()), 1, 1, clamp_min, clamp_max); // Compare the output of the micro-kernels against the output of the reference implementation.