From 9ed249083dd2ac69c90775f2193df400e8ee1ed7 Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Mon, 9 Jun 2025 16:15:26 +0200 Subject: [PATCH 1/2] Fix bug where kai_get_m_step returns the incorrect value Fix issue in kernels - matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla - matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla where kai_get_m_step returns the incorrect value. Signed-off-by: Jens Elofsson --- .../kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c | 3 ++- .../kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c index 627ab688..df4be133 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c @@ -18,13 +18,14 @@ #include "kai/kai_common.h" +static const size_t kai_m_step = 1; static const size_t kai_mr = 1; static const size_t kai_nr = 16; static const size_t kai_kr = 1; static const size_t kai_sr = 1; size_t kai_get_m_step_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla(void) { - return kai_mr * kai_get_sme_vector_length_u32(); + return kai_m_step; } size_t kai_get_n_step_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla(void) { diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c index cf060840..2cadd759 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c @@ -25,7 +25,7 @@ static const size_t kai_kr = 1; static const size_t kai_sr = 1; size_t kai_get_m_step_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla(void) { - return kai_m_step * kai_get_sme_vector_length_u32(); + return kai_m_step; } size_t kai_get_n_step_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla(void) { -- GitLab From 4f425e681efc598f9e832de81314b7cf0e0944ce Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Tue, 10 Jun 2025 08:31:15 +0200 Subject: [PATCH 2/2] Add changelog entry. Signed-off-by: Jens Elofsson --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10a4d450..e430695d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,10 @@ KleidiAI follows the [Semantic Versioning](https://semver.org/) specification fo - Matrix multiplication (MxN) Micro-kernels of QSI8D32 LHS and QAI4C32 RHS with F16 output, optimized for FEAT_DotProd. - Optimized version of kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0 kernel for block depth of 8 bytes (`kai_rhs_pack_nxk_qsi4c32pnrx8_qsu4c32s1s0_neon`) - Added Convolution example using SME Indirect Matmul Kernels +- Fixes: + - Fix issue where kai_get_m_step() returns the incorrect value for kernels + - matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla + - matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla ## v1.9.0 -- GitLab