From 2826248b43b72675ec4259511e2b51b1408cb555 Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Mon, 28 Apr 2025 14:26:37 +0100 Subject: [PATCH 1/2] Fix segmentation faults in benchmark tool * Fix incorrect calculation of LHS matrix stride value For kernels that use the LHS matrix stride in their API, namely `kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla` and `kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla` kernels, the LHS stride value was calculated incorrectly by computing in terms of bits, not bytes. * Fix insufficient allocation of memory for SME kernels For SME kernels, such as `kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla`, the tensor sizes are in terms of the streaming SVE vector length. Thus, when running SME kernels we must scale the LHS/RHS/DST buffer sizes by the VL appropriately. The segmentation faults were discovered when running with address sanitizer enabled. Signed-off-by: Jakub Sujak --- CMakeLists.txt | 1 + benchmark/matmul/matmul_benchmark_logic.hpp | 13 ++++++++++--- benchmark/matmul/matmul_runner.hpp | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ed08310..6539e0ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -446,6 +446,7 @@ if(KLEIDIAI_BUILD_BENCHMARK) benchmark/main.cpp benchmark/matmul/matmul_registry.cpp ) + set_source_files_properties(benchmark/matmul/matmul_registry.cpp PROPERTIES COMPILE_OPTIONS "-march=armv8-a+sve2") target_link_libraries(kleidiai_benchmark PRIVATE diff --git a/benchmark/matmul/matmul_benchmark_logic.hpp b/benchmark/matmul/matmul_benchmark_logic.hpp index cff73a2d..e3947f46 100644 --- a/benchmark/matmul/matmul_benchmark_logic.hpp +++ b/benchmark/matmul/matmul_benchmark_logic.hpp @@ -12,6 +12,7 @@ #include #include +#include "kai/kai_common.h" #include "matmul_interface.hpp" #include "matmul_runner.hpp" @@ -69,9 +70,15 @@ void kai_benchmark_matmul( } // Create sufficiently large buffers - const size_t lhs_size = m * k * sizeof(uint64_t); - const size_t rhs_size = n * k * sizeof(uint64_t); - const size_t dst_size = m * n * sizeof(uint32_t); + size_t lhs_size = m * k * sizeof(uint64_t); + size_t rhs_size = n * k * sizeof(uint64_t); + size_t dst_size = m * n * sizeof(uint32_t); + + if (test::cpu_has_sme() || test::cpu_has_sme2()) { + lhs_size *= kai_get_sme_vector_length_u32(); + rhs_size *= kai_get_sme_vector_length_u32(); + dst_size *= kai_get_sme_vector_length_u32(); + } const Buffer lhs(lhs_size); const Buffer rhs(rhs_size); diff --git a/benchmark/matmul/matmul_runner.hpp b/benchmark/matmul/matmul_runner.hpp index 0f04cd65..85c04969 100644 --- a/benchmark/matmul/matmul_runner.hpp +++ b/benchmark/matmul/matmul_runner.hpp @@ -46,7 +46,7 @@ public: n_ = n; k_ = k; - lhs_stride_ = k_ * data_type_size_in_bits(dst_type_); + lhs_stride_ = k_ * data_type_size_in_bits(dst_type_) / 8; dst_stride_row_ = n_ * data_type_size_in_bits(dst_type_) / 8; dst_stride_col_ = data_type_size_in_bits(dst_type_) / 8; } -- GitLab From a88c40a63aa2fd97861728cfdd1e7ec3f8db1910 Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Mon, 28 Apr 2025 15:03:23 +0100 Subject: [PATCH 2/2] Add option for -march extension Signed-off-by: Jakub Sujak --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6539e0ce..d2f4b5fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -446,7 +446,8 @@ if(KLEIDIAI_BUILD_BENCHMARK) benchmark/main.cpp benchmark/matmul/matmul_registry.cpp ) - set_source_files_properties(benchmark/matmul/matmul_registry.cpp PROPERTIES COMPILE_OPTIONS "-march=armv8-a+sve2") + set_source_files_properties(benchmark/matmul/matmul_registry.cpp + PROPERTIES COMPILE_OPTIONS "-march=armv8-a+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") target_link_libraries(kleidiai_benchmark PRIVATE -- GitLab