From 4aa9fb031d6fcdab8cc13b5c8b09a08df0639da7 Mon Sep 17 00:00:00 2001 From: Anitha Raj Date: Thu, 3 Apr 2025 13:58:41 +0100 Subject: [PATCH] Rename kai_lhs_quant_pack_qsi8d32p_f32_neon to kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon Signed-off-by: Anitha Raj --- CHANGELOG.md | 3 +++ CMakeLists.txt | 2 +- kai/ukernels/matmul/BUILD.bazel | 2 +- ...qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h | 4 ++-- ...f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.h | 4 ++-- ... => kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.c} | 14 +++++++------- ... => kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.h} | 12 ++++++------ .../matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp | 8 ++++---- 8 files changed, 26 insertions(+), 23 deletions(-) rename kai/ukernels/matmul/pack/{kai_lhs_quant_pack_qsi8d32p_f32_neon.c => kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.c} (87%) rename kai/ukernels/matmul/pack/{kai_lhs_quant_pack_qsi8d32p_f32_neon.h => kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.h} (88%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1247ef29..8fe632af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ KleidiAI follows the [Semantic Versioning](https://semver.org/) specification fo ## Upcoming Release +- Breaking changes: + - Rename kai_lhs_quant_pack_qsi8d32p_f32_neon to kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon + ## v1.6.0 - Add CMake installation and `find_package()` support. diff --git a/CMakeLists.txt b/CMakeLists.txt index 53ce829f..73bb83c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,7 +118,7 @@ set(KLEIDIAI_FILES_NEON_FP16_BF16 set(KLEIDIAI_FILES_NEON kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_asm.S - kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c + kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi8cxp_qsi8cx_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c diff --git a/kai/ukernels/matmul/BUILD.bazel b/kai/ukernels/matmul/BUILD.bazel index 3796d88d..7b656c49 100644 --- a/kai/ukernels/matmul/BUILD.bazel +++ b/kai/ukernels/matmul/BUILD.bazel @@ -28,7 +28,7 @@ SCALAR_KERNELS = [ # buildifier: keep sorted NEON_KERNELS = [ - "pack/kai_lhs_quant_pack_qsi8d32p_f32_neon", + "pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon", "pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon", "pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0", "pack/kai_rhs_pack_kxn_qsi8cxp_qsi8cx_neon", diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h index 5c7e6e05..4ecff7cb 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -14,7 +14,7 @@ extern "C" { /// Micro-kernel dependencies /// -/// -# @ref kai_lhs_quant_pack_qsi8d32p_f32_neon to dynamically quantize and pack the LHS matrix in a single step. +/// -# @ref kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon to dynamically quantize and pack the LHS matrix in a single step. /// -# @ref kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon to pack the RHS NxK matrix. /// -------------------------------------------------- diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.h b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.h index 990fa145..ec0da8f1 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.h +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -14,7 +14,7 @@ extern "C" { /// Micro-kernel dependencies /// -/// -# @ref kai_lhs_quant_pack_qsi8d32p_f32_neon to dynamically quantize and pack the LHS matrix in a single step. +/// -# @ref kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon to dynamically quantize and pack the LHS matrix in a single step. /// -# @ref kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon to pack the RHS NxK matrix. /// -------------------------------------------------- diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.c similarity index 87% rename from kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c rename to kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.c index 70298754..1bba9838 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -8,7 +8,7 @@ #error This file must be compiled for AArch64. #else // Architectural features check. -#include "kai_lhs_quant_pack_qsi8d32p_f32_neon.h" +#include "kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.h" #include #include @@ -32,16 +32,16 @@ inline static size_t kai_lhs_packed_stride(size_t k, size_t mr, size_t kr, size_ return mr * kai_num_blocks_per_row(k, bl) * kai_num_bytes_per_block(bl); } -size_t kai_get_m_step_lhs_quant_pack_qsi8d32p_f32_neon(size_t mr) { +size_t kai_get_m_step_lhs_quant_pack_qsi8d32pmrx4_f32_neon(size_t mr) { KAI_UNUSED(mr); return 1; } -size_t kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32_neon(size_t m_idx, size_t lhs_stride) { +size_t kai_get_lhs_offset_lhs_quant_pack_qsi8d32pmrx4_f32_neon(size_t m_idx, size_t lhs_stride) { return m_idx * lhs_stride; } -size_t kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon( +size_t kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32pmrx4_f32_neon( size_t m_idx, size_t k, size_t bl, size_t mr, size_t kr, size_t sr) { KAI_ASSUME((k % 2) == 0); KAI_ASSUME((k % kr) == 0); @@ -55,7 +55,7 @@ size_t kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon( return (m_idx / mr) * kai_lhs_packed_stride(k, mr, kr, bl); } -size_t kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon( +size_t kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32pmrx4_f32_neon( size_t m, size_t k, size_t bl, size_t mr, size_t kr, size_t sr) { KAI_ASSUME((k % 2) == 0); KAI_ASSUME((k % kr) == 0); @@ -69,7 +69,7 @@ size_t kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon( return (num_rows * kai_lhs_packed_stride(k, mr, kr, bl)); } -void kai_run_lhs_quant_pack_qsi8d32p_f32_neon( +void kai_run_lhs_quant_pack_qsi8d32pmrx4_f32_neon( size_t m, size_t k, size_t bl, size_t mr, size_t kr, size_t sr, size_t m_idx_start, const float* lhs, size_t lhs_stride, void* lhs_packed) { KAI_ASSUME((bl % kr) == 0); diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.h b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.h similarity index 88% rename from kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.h rename to kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.h index 837d95bd..3e28ec99 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.h +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -18,7 +18,7 @@ extern "C" { /// @param[in] mr The number of M rows to interleave on the same output row. /// /// @return the m step value -size_t kai_get_m_step_lhs_quant_pack_qsi8d32p_f32_neon(size_t mr); +size_t kai_get_m_step_lhs_quant_pack_qsi8d32pmrx4_f32_neon(size_t mr); /// Gets the offset in bytes for the LHS matrix (not packed) /// @@ -28,7 +28,7 @@ size_t kai_get_m_step_lhs_quant_pack_qsi8d32p_f32_neon(size_t mr); /// @param[in] lhs_stride The number of bytes in in each row of the LHS matrix (not packed) /// /// @return the offset in bytes to the LHS matrix -size_t kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32_neon(size_t m_idx, size_t lhs_stride); +size_t kai_get_lhs_offset_lhs_quant_pack_qsi8d32pmrx4_f32_neon(size_t m_idx, size_t lhs_stride); /// Gets the offset in bytes for the packed LHS matrix, /// which contains the packed 8-bit quantized symmetric per-block (qsi8d32) values. @@ -43,7 +43,7 @@ size_t kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32_neon(size_t m_idx, size_t /// @param[in] sr The number of kr splits. It can be 1 (no splits) up to kr. /// /// @return the offset in bytes to the packed LHS matrix -size_t kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon( +size_t kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32pmrx4_f32_neon( size_t m_idx, size_t k, size_t bl, size_t mr, size_t kr, size_t sr); /// Gets the size in bytes for the quantized and packed LHS matrix @@ -57,7 +57,7 @@ size_t kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon( /// @param[in] sr The number of kr splits. It can be 1 (no splits) up to kr. /// /// @return the packed LHS matrix size in bytes -size_t kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon( +size_t kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32pmrx4_f32_neon( size_t m, size_t k, size_t bl, size_t mr, size_t kr, size_t sr); /// Run the micro-kernel to quantize and pack the LHS matrix. @@ -74,7 +74,7 @@ size_t kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon( /// @param[in] lhs LHS matrix. /// @param[in] lhs_stride Stride in bytes between two rows of LHS. /// @param[out] lhs_packed The quantized and packed LHS matrix. -void kai_run_lhs_quant_pack_qsi8d32p_f32_neon( +void kai_run_lhs_quant_pack_qsi8d32pmrx4_f32_neon( size_t m, size_t k, size_t bl, size_t mr, size_t kr, size_t sr, size_t m_idx_start, const float* lhs, size_t lhs_stride, void* lhs_packed); diff --git a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp index 153decb5..4c24abf6 100644 --- a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp @@ -25,7 +25,7 @@ #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p_qsi4c32p_interface.h" #include "kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.h" -#include "kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.h" +#include "kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32pmrx4_f32_neon.h" #include "kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.h" #include "kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h" #include "test/common/cpu_info.hpp" @@ -86,10 +86,10 @@ static const std::array< clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod, cpu_has_dotprod, lhs_quant_pack_qsi8d32p_f32, rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0), UKERNEL_MATMUL_PACK_VARIANT( - clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa, cpu_has_sme2, lhs_quant_pack_qsi8d32p_f32_neon, - rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon), + clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa, cpu_has_sme2, + lhs_quant_pack_qsi8d32pmrx4_f32_neon, rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon), UKERNEL_MATMUL_PACK_VARIANT( - clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot, cpu_has_sme2, lhs_quant_pack_qsi8d32p_f32_neon, + clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot, cpu_has_sme2, lhs_quant_pack_qsi8d32pmrx4_f32_neon, rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon)}}; using MatMulTestParams_withPortion = std::tuple; -- GitLab