From 856d77b98e450e30ea19f886a4d98b46d1ac8484 Mon Sep 17 00:00:00 2001 From: Anitha Raj Date: Mon, 10 Feb 2025 14:32:46 +0000 Subject: [PATCH 1/4] Fix the qsi4cxps1s0 RHS packing if bias is a null pointer Resolves: #KLEIDIAI-485, #COMPMID-8024 Signed-off-by: Anitha Raj --- ...rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c index a594f9d1..5b7a5787 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -97,18 +97,28 @@ void kai_run_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon( // Copy the scaling factors and bias size_t rows_left = n - row_idx; + // Saving scales. if (rows_left >= nr) { memcpy(scaling_factors, &scale[row_idx], nr * kai_num_bytes_multiplier_rhs); - memcpy(biases, &bias[row_idx], nr * kai_num_bytes_bias); } else { // Fill remaining values memcpy(scaling_factors, &scale[row_idx], rows_left * kai_num_bytes_multiplier_rhs); - memcpy(biases, &bias[row_idx], rows_left * kai_num_bytes_bias); // Set leftover to 0 memset(&scaling_factors[rows_left], 0, (nr - rows_left) * kai_num_bytes_multiplier_rhs); - memset(&biases[rows_left], 0, (nr - rows_left) * kai_num_bytes_bias); } - + if (bias == NULL) { + // Set bias to 0 + memset(biases, 0, nr * kai_num_bytes_bias); + } else { + if (rows_left >= nr) { + memcpy(scaling_factors, &scale[row_idx], nr * kai_num_bytes_multiplier_rhs); + } else { + // Fill remaining values + memcpy(biases, &bias[row_idx], rows_left * kai_num_bytes_bias); + // Set leftover to 0 + memset(&biases[rows_left], 0, (nr - rows_left) * kai_num_bytes_bias); + } + } // Iterate over rows in the nr row block for (size_t nr_block_idx = 0; nr_block_idx < nr; ++nr_block_idx) { const uint8_t* const src_row = rhs + ((row_idx + nr_block_idx) * rhs_stride); -- GitLab From a127986e1bea85d26c94061fbb4ec3ea252d4a4c Mon Sep 17 00:00:00 2001 From: Anitha Raj Date: Mon, 10 Feb 2025 14:32:46 +0000 Subject: [PATCH 2/4] Fix the qsi4cxps1s0 RHS packing if bias is a null pointer Resolves: #KLEIDIAI-485, #COMPMID-8024 Signed-off-by: Anitha Raj --- .../matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c index 5b7a5787..d0c66276 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c @@ -111,7 +111,7 @@ void kai_run_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon( memset(biases, 0, nr * kai_num_bytes_bias); } else { if (rows_left >= nr) { - memcpy(scaling_factors, &scale[row_idx], nr * kai_num_bytes_multiplier_rhs); + memcpy(biases, &bias[row_idx], nr * kai_num_bytes_bias); } else { // Fill remaining values memcpy(biases, &bias[row_idx], rows_left * kai_num_bytes_bias); -- GitLab From c39960df71443280044b22dbe836b0f2cdb500ff Mon Sep 17 00:00:00 2001 From: Anitha Raj Date: Mon, 10 Feb 2025 14:53:16 +0000 Subject: [PATCH 3/4] Update Changelog Signed-off-by: Anitha Raj --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 965ff197..006b7bbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ KleidiAI follows the [Semantic Versioning](https://semver.org/) specification fo - New 4x8 block size variant of matrix multiplication of QAI8DXP LHS and QSI4C32P RHS with F32 output. - Optimizations for FEAT_DotProd. - Added demonstration of integration using CMake in F16 Arm® Neon™ matrix multiplication example. +- Fixes: + - Fix the RHS packing micro-kernel kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon to handle null bias. ## v1.3.0 -- GitLab From 3566f28d520ce8274eb5044fcc34f6435643a40b Mon Sep 17 00:00:00 2001 From: Anitha Raj Date: Mon, 10 Feb 2025 16:43:26 +0000 Subject: [PATCH 4/4] Update documentation in header file Signed-off-by: Anitha Raj --- .../pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.h b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.h index 07e953d1..6e94913a 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.h +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -89,7 +89,7 @@ size_t kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon( /// @param[in] sr The number of kr splits. It can be 1 (no splits) up to kr. /// @param[in] rhs The RHS matrix containing the 4-bit values. /// Size in bytes is expected to be greater than or equal to n * k * (sizeof(uint8_t) / 2). -/// @param[in] bias The biases. +/// @param[in] bias The biases. The bias is set to 0.f if this argument is NULL. /// @param[in] scale The scale for each output channel. /// @param[out] rhs_packed The packed RHS matrix. /// @param[in] extra_bytes Extra bytes to append to the end of each row of the packed RHS matrix. -- GitLab