From 4805975e732ec66a486121c1da376e46a0b24542 Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Wed, 16 Jul 2025 13:49:09 +0200 Subject: [PATCH 1/4] Create separate .S-file for common support functions Create kai_common_asm.S to hold support functions that uses pure assembly instead of inlined assembly. The function moved in this patch is kai_get_sme_vector_length_u8. Signed-off-by: Jens Elofsson --- BUILD.bazel | 1 + CMakeLists.txt | 7 ++- .../CMakeLists.txt | 1 + kai/kai_common.h | 13 +---- kai/kai_common_asm.S | 50 +++++++++++++++++++ 5 files changed, 60 insertions(+), 12 deletions(-) create mode 100644 kai/kai_common_asm.S diff --git a/BUILD.bazel b/BUILD.bazel index c4abdc25..c58b41e2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -29,6 +29,7 @@ config_setting( cc_library( name = "common", + srcs = ["kai/kai_common_asm.S"], hdrs = ["kai/kai_common.h"], ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fe93eff..53ac7ef8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -340,6 +340,7 @@ if(NOT MSVC) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2}) + target_sources(kleidiai PRIVATE kai/kai_common_asm.S) set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_NEON} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) @@ -354,12 +355,14 @@ if(NOT MSVC) # Use -fno-tree-vectorize option to disable compiler based vectorization set_source_files_properties(${KLEIDIAI_FILES_SME} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") + set_source_files_properties(kai/kai_common_asm.S PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") else() target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2_ASM}) + target_sources(kleidiai PRIVATE kai/kai_common_asm.S) set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS /arch:armv8.0${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) @@ -367,13 +370,15 @@ else() set_source_files_properties(${KLEIDIAI_FILES_NEON_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_SME_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_SME2_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(kai/kai_common_asm.S PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set(KLEIDIAI_FILES_ASM ${KLEIDIAI_FILES_SME_ASM} ${KLEIDIAI_FILES_SME2_ASM} ${KLEIDIAI_FILES_NEON_ASM} ${KLEIDIAI_FILES_NEON_DOTPROD_ASM} - ${KLEIDIAI_FILES_NEON_I8MM_ASM}) + ${KLEIDIAI_FILES_NEON_I8MM_ASM} + kai/kai_common_asm.S) list(FILTER KLEIDIAI_FILES_ASM INCLUDE REGEX "^.*\.S$") set_source_files_properties(${KLEIDIAI_FILES_ASM} PROPERTIES LANGUAGE ASM_MARMASM) diff --git a/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt b/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt index afc8a4c5..40a6f520 100644 --- a/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt +++ b/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt @@ -23,6 +23,7 @@ set(KAI_SOURCES ${KAI_PATH}/kai/ukernels/matmul/pack/kai_lhs_imatmul_pack_x16p2vlx2_x16p_sme.c ${KAI_PATH}/kai/ukernels/matmul/pack/kai_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme_asm.S ${KAI_PATH}/kai/ukernels/matmul/pack/kai_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme.c + ${KAI_PATH}/kai/kai_common_asm.S ) # Files requires to build the executable diff --git a/kai/kai_common.h b/kai/kai_common.h index fb3003f7..7c0feca2 100644 --- a/kai/kai_common.h +++ b/kai/kai_common.h @@ -143,18 +143,9 @@ inline static size_t kai_roundup(size_t a, size_t b) { return ((a + b - 1) / b) * b; } -#ifdef __ARM_FEATURE_SVE2 +#if defined(__ARM_FEATURE_SVE2) || defined(_M_ARM64) /// Gets the SME vector length for 8-bit elements. -inline static uint64_t kai_get_sme_vector_length_u8(void) { - uint64_t res = 0; - __asm__ __volatile__( - ".inst 0x04bf5827 // rdsvl x7, #1\n" - "mov %0, x7\n" - : "=r"(res) - : /* no inputs */ - : "x7"); - return res; -} +uint64_t kai_get_sme_vector_length_u8(void); /// Gets the SME vector length for 16-bit elements. inline static uint64_t kai_get_sme_vector_length_u16(void) { diff --git a/kai/kai_common_asm.S b/kai/kai_common_asm.S new file mode 100644 index 00000000..baafe7cc --- /dev/null +++ b/kai/kai_common_asm.S @@ -0,0 +1,50 @@ +// +// SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates +// +// SPDX-License-Identifier: Apache-2.0 +// + +#if defined(_MSC_VER) + #define KAI_ASM_GLOBAL(name) GLOBAL name + #define KAI_ASM_FUNCTION_TYPE(name) + #define KAI_ASM_FUNCTION_LABEL(name) name PROC + #define KAI_ASM_FUNCTION_END(name) ENDP + + #define KAI_ASM_CODE(name) AREA name, CODE, READONLY + #define KAI_ASM_ALIGN + #define KAI_ASM_LABEL(name) name + #define KAI_ASM_INST(hex) DCD hex + #define KAI_ASM_END END +#else + #if defined(__APPLE__) + #define KAI_ASM_GLOBAL(name) .globl _##name + #define KAI_ASM_FUNCTION_TYPE(name) + #define KAI_ASM_FUNCTION_LABEL(name) _##name: + #define KAI_ASM_FUNCTION_END(name) + #else + #define KAI_ASM_GLOBAL(name) .global name + #define KAI_ASM_FUNCTION_TYPE(name) .type name, %function + #define KAI_ASM_FUNCTION_LABEL(name) name: + #define KAI_ASM_FUNCTION_END(name) .size name, .-name + #endif + + #define KAI_ASM_CODE(name) .text + #define KAI_ASM_ALIGN .p2align 4,,11 + #define KAI_ASM_LABEL(name) name: + #define KAI_ASM_INST(hex) .inst hex + #define KAI_ASM_END +#endif + + KAI_ASM_CODE(kai_common) + KAI_ASM_ALIGN + + KAI_ASM_GLOBAL(kai_get_sme_vector_length_u8) + +KAI_ASM_FUNCTION_TYPE(kai_get_sme_vector_length_u8) +KAI_ASM_FUNCTION_LABEL(kai_get_sme_vector_length_u8) + KAI_ASM_INST(0x04bf5820) // rdsvl x0, #1 + ret + KAI_ASM_FUNCTION_END(kai_get_sme_vector_length_u8) + + KAI_ASM_END + -- GitLab From ba5d61e9e85af580de61ddeb8f215d70fedfa32c Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Thu, 17 Jul 2025 11:20:50 +0200 Subject: [PATCH 2/4] Address review comments - Rename file to kai_common_sme_asm.S - Add it to KLEIDIAI_FILES_SME_ASM in CMakeLists.txt Signed-off-by: Jens Elofsson --- CMakeLists.txt | 8 ++------ kai/{kai_common_asm.S => kai_common_sme_asm.S} | 0 2 files changed, 2 insertions(+), 6 deletions(-) rename kai/{kai_common_asm.S => kai_common_sme_asm.S} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 53ac7ef8..d08faf2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,6 +241,7 @@ set(KLEIDIAI_FILES_NEON_I8MM ) set(KLEIDIAI_FILES_SME_ASM + kai/kai_common_sme_asm.S kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla.c kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p2vlx2b_1x8vl_sme_mla_asm.S kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2b_2vlx2vl_sme_mopa.c @@ -340,7 +341,6 @@ if(NOT MSVC) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2}) - target_sources(kleidiai PRIVATE kai/kai_common_asm.S) set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_NEON} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) @@ -355,14 +355,12 @@ if(NOT MSVC) # Use -fno-tree-vectorize option to disable compiler based vectorization set_source_files_properties(${KLEIDIAI_FILES_SME} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") - set_source_files_properties(kai/kai_common_asm.S PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") else() target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME_ASM}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2_ASM}) - target_sources(kleidiai PRIVATE kai/kai_common_asm.S) set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS /arch:armv8.0${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) @@ -370,15 +368,13 @@ else() set_source_files_properties(${KLEIDIAI_FILES_NEON_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_SME_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_SME2_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(kai/kai_common_asm.S PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set(KLEIDIAI_FILES_ASM ${KLEIDIAI_FILES_SME_ASM} ${KLEIDIAI_FILES_SME2_ASM} ${KLEIDIAI_FILES_NEON_ASM} ${KLEIDIAI_FILES_NEON_DOTPROD_ASM} - ${KLEIDIAI_FILES_NEON_I8MM_ASM} - kai/kai_common_asm.S) + ${KLEIDIAI_FILES_NEON_I8MM_ASM}) list(FILTER KLEIDIAI_FILES_ASM INCLUDE REGEX "^.*\.S$") set_source_files_properties(${KLEIDIAI_FILES_ASM} PROPERTIES LANGUAGE ASM_MARMASM) diff --git a/kai/kai_common_asm.S b/kai/kai_common_sme_asm.S similarity index 100% rename from kai/kai_common_asm.S rename to kai/kai_common_sme_asm.S -- GitLab From 1c22b878772828536b260aae4c1152d3f784eec4 Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Thu, 17 Jul 2025 13:38:24 +0200 Subject: [PATCH 3/4] Change to new filename in all required places. Signed-off-by: Jens Elofsson --- BUILD.bazel | 2 +- examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index c58b41e2..a3c3e755 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -29,7 +29,7 @@ config_setting( cc_library( name = "common", - srcs = ["kai/kai_common_asm.S"], + srcs = ["kai/kai_common_sme_asm.S"], hdrs = ["kai/kai_common.h"], ) diff --git a/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt b/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt index 40a6f520..95a4b411 100644 --- a/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt +++ b/examples/conv2d_imatmul_clamp_f16_f16_f16p_sme2/CMakeLists.txt @@ -23,7 +23,7 @@ set(KAI_SOURCES ${KAI_PATH}/kai/ukernels/matmul/pack/kai_lhs_imatmul_pack_x16p2vlx2_x16p_sme.c ${KAI_PATH}/kai/ukernels/matmul/pack/kai_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme_asm.S ${KAI_PATH}/kai/ukernels/matmul/pack/kai_rhs_imatmul_pack_kxn_x16p2vlx2b_x16_x16_sme.c - ${KAI_PATH}/kai/kai_common_asm.S + ${KAI_PATH}/kai/kai_common_sme_asm.S ) # Files requires to build the executable -- GitLab From fcdd4062db75dced982cdc6f2b5f4008bd5a19c5 Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Thu, 17 Jul 2025 15:22:26 +0200 Subject: [PATCH 4/4] Address review comments - Update closing guard comment to match the opening guard Signed-off-by: Jens Elofsson --- kai/kai_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kai/kai_common.h b/kai/kai_common.h index 7c0feca2..ad30c4cb 100644 --- a/kai/kai_common.h +++ b/kai/kai_common.h @@ -156,7 +156,7 @@ inline static uint64_t kai_get_sme_vector_length_u16(void) { inline static uint64_t kai_get_sme_vector_length_u32(void) { return kai_get_sme_vector_length_u8() / 4; } -#endif // __ARM_FEATURE_SVE2 +#endif // defined(__ARM_FEATURE_SVE2) || defined(_M_ARM64) /// Extends the sign bit of int 4-bit value (stored in int8_t variable) /// @param[in] value The 4-bit int value -- GitLab