From f0a22a5006e813a1188335eebff5d8fcb7fb3ca6 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 16 Jul 2025 13:14:43 +0100 Subject: [PATCH] Add more micro-kernels to MSVC build * List of micro-kernels added to the MSVC build: - kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla - kai_lhs_quant_pack_qsi8d32p_f32_neon - kai_rhs_pack_kxn_qsi8cxp_qsi8cx_neon - kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon - kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon - kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon * Fix compilation issues. Signed-off-by: Viet-Hoa Do --- CHANGELOG.md | 7 +++++++ CMakeLists.txt | 10 +++++----- ..._matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c | 3 --- ...i_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c | 3 --- ...ul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot.c | 3 --- .../matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c | 2 +- ...s_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c | 4 ++-- .../kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c | 2 +- 8 files changed, 16 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1986fa1c..7c8316ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,13 @@ KleidiAI follows the [Semantic Versioning](https://semver.org/) specification fo - Convert SME transposed RHS packing micro-kernels to pure assembly: - kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme - kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme +- Include more micro-kernels in MSVC build: + - kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla + - kai_lhs_quant_pack_qsi8d32p_f32_neon + - kai_rhs_pack_kxn_qsi8cxp_qsi8cx_neon + - kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon + - kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon + - kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon - Fixes - Update kai_kernel_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa to improve accuracy diff --git a/CMakeLists.txt b/CMakeLists.txt index 054d5b45..1a73b28a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,20 +160,20 @@ set(KLEIDIAI_FILES_NEON_ASM kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pnrx8_qsu4c32s1s0_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pnrx4_qsu4c32s1s0_neon.c kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_bf16_neon.c -) - -set(KLEIDIAI_FILES_NEON - ${KLEIDIAI_FILES_NEON_ASM} kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla_asm.S kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c - kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi8cxp_qsi8cx_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c ) +set(KLEIDIAI_FILES_NEON + ${KLEIDIAI_FILES_NEON_ASM} + kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.c +) + set(KLEIDIAI_FILES_NEON_DOTPROD_ASM kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_asm.S diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c index e9453870..57b2dc4e 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p16vlx1b_1x16vl_sme2_mla.c @@ -4,9 +4,6 @@ // SPDX-License-Identifier: Apache-2.0 // -// Do not flag up inline assembly blocks -#pragma GCC diagnostic ignored "-Woverlength-strings" - #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64) #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c index b2fe03aa..bf200a7b 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p2vlx1b_1x16vl_sme2_mla.c @@ -4,9 +4,6 @@ // SPDX-License-Identifier: Apache-2.0 // -// Do not flag up inline assembly blocks -#pragma GCC diagnostic ignored "-Woverlength-strings" - #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64) #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. diff --git a/kai/ukernels/matmul/matmul_clamp_qai8_qai8_qsi8cxp/kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot.c b/kai/ukernels/matmul/matmul_clamp_qai8_qai8_qsi8cxp/kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot.c index 6a5f9536..a633a840 100644 --- a/kai/ukernels/matmul/matmul_clamp_qai8_qai8_qsi8cxp/kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot.c +++ b/kai/ukernels/matmul/matmul_clamp_qai8_qai8_qsi8cxp/kai_matmul_clamp_qai8_qai8_qsi8cxp2vlx4sb_1x16vl_sme2_dot.c @@ -4,9 +4,6 @@ // SPDX-License-Identifier: Apache-2.0 // -// Do not flag up inline assembly blocks -#pragma GCC diagnostic ignored "-Woverlength-strings" - #if (!defined(__aarch64__) || !defined(__ARM_FEATURE_SVE2)) && !defined(_M_ARM64) #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c index 736c8086..e6c419a9 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c @@ -4,7 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#if !defined(__aarch64__) +#if !defined(__aarch64__) && !defined(_M_ARM64) #error This file must be compiled for AArch64. #else // Architectural features check. diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c index 419f5cd0..f3676167 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c @@ -1,10 +1,10 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // -#if !defined(__aarch64__) +#if !defined(__aarch64__) && !defined(_M_ARM64) #error This file must be compiled for AArch64. #else // Architectural features check. diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c index f264c5c0..271b2173 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c @@ -3,7 +3,7 @@ // // SPDX-License-Identifier: Apache-2.0 // -#if !defined(__aarch64__) +#if !defined(__aarch64__) && !defined(_M_ARM64) #error This file must be compiled for AArch64. #else // Architectural features check. -- GitLab