From 5f6c4e90622e1d1bd238fe737e17444023a6dc35 Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Wed, 29 Jan 2025 15:45:29 +0100 Subject: [PATCH 1/4] Add kernel assembly files to MSVC build. Signed-off-by: Jens Elofsson --- CMakeLists.txt | 112 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7759cfa8..be5f6c62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,9 +84,8 @@ if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS message(WARNING "KleidiAI: Using non-supported GCC version. Expected ${KLEIDIAI_MIN_GNU_VERSION} or newer, received ${CMAKE_C_COMPILER_VERSION}.") endif() -set(KLEIDIAI_FILES_SCALAR +set(KLEIDIAI_FILES_SCALAR_GENERIC kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c - kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c @@ -94,6 +93,11 @@ set(KLEIDIAI_FILES_SCALAR kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c ) +set(KLEIDIAI_FILES_SCALAR + ${KLEIDIAI_FILES_SCALAR_GENERIC} + kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c +) + set(KLEIDIAI_FILES_NEON_FP16 kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f16p16x1biasf16_f16_f16_neon.c kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c @@ -123,11 +127,25 @@ set(KLEIDIAI_FILES_NEON kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c +) + +set(KLEIDIAI_FILES_NEON_DOTPROD_ASM + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_asm.S + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_asm.S + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_asm.S + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod_asm.S +) +set(KLEIDIAI_FILES_NEON_DOTPROD_GENERIC + ${KLEIDIAI_FILES_NEON_DOTPROD_ASM} + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod.c ) set(KLEIDIAI_FILES_NEON_DOTPROD - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c + ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c @@ -136,32 +154,33 @@ set(KLEIDIAI_FILES_NEON_DOTPROD kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_asm.S - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_asm.S - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_asm.S - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod_asm.S +) + +set(KLEIDIAI_FILES_NEON_I8MM_ASM + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_asm.S + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_asm.S + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_asm.S +) + +set(KLEIDIAI_FILES_NEON_I8MM_GENERIC + ${KLEIDIAI_FILES_NEON_I8MM_ASM} + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c + kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c ) set(KLEIDIAI_FILES_NEON_I8MM + ${KLEIDIAI_FILES_NEON_I8MM_GENERIC} kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_asm.S - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_asm.S - kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_asm.S ) set(KLEIDIAI_FILES_SME @@ -193,7 +212,6 @@ set(KLEIDIAI_FILES_SME2 ) add_library(kleidiai) -target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SCALAR}) # Selectively enable architecture features. # @@ -201,28 +219,44 @@ target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SCALAR}) # optimized KleidiAI micro-kernels. It can however still be used to compile scalar routines. # https://learn.microsoft.com/en-us/cpp/assembler/inline/inline-assembler?view=msvc-170 # -if((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") AND NOT MSVC) - - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_BF16}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16_BF16}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2}) - - set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+dotprod${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+i8mm${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - - # Use -fno-tree-vectorize option to disable compiler based vectorization - set_source_files_properties(${KLEIDIAI_FILES_SME} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") - set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") + +if((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")) + if(NOT MSVC) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SCALAR}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_BF16}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16_BF16}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2}) + + set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+dotprod${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+i8mm${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + + # Use -fno-tree-vectorize option to disable compiler based vectorization + set_source_files_properties(${KLEIDIAI_FILES_SME} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") + set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") + else() + target_sources(kleidiai PRIVATE + ${KLEIDIAI_FILES_SCALAR_GENERIC} + ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} + ${KLEIDIAI_FILES_NEON_I8MM_GENERIC} + ) + + set_source_files_properties(${KLEIDIAI_FILES_SCALAR_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.0) + set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2) + set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2) + + set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_ASM} PROPERTIES LANGUAGE ASM_MARMASM) + set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_ASM} PROPERTIES LANGUAGE ASM_MARMASM) + endif() endif() target_include_directories(kleidiai -- GitLab From 4ae36a5d02b7867c054d53889ca54b0cc6e01732 Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Fri, 31 Jan 2025 14:44:15 +0100 Subject: [PATCH 2/4] Address review comments - Remove KLEIDIAI_FILES_SCALAR_GENERIC list - Remove if-statement checking CMAKE_SYSTEM_PROCESSOR - Change documentation comment to reflect the new changes - Don't use separate list for asm files Signed-off-by: Jens Elofsson --- CMakeLists.txt | 98 +++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index be5f6c62..210317b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,17 +84,13 @@ if(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS message(WARNING "KleidiAI: Using non-supported GCC version. Expected ${KLEIDIAI_MIN_GNU_VERSION} or newer, received ${CMAKE_C_COMPILER_VERSION}.") endif() -set(KLEIDIAI_FILES_SCALAR_GENERIC +set(KLEIDIAI_FILES_SCALAR kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c -) - -set(KLEIDIAI_FILES_SCALAR - ${KLEIDIAI_FILES_SCALAR_GENERIC} kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c ) @@ -129,15 +125,11 @@ set(KLEIDIAI_FILES_NEON kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c ) -set(KLEIDIAI_FILES_NEON_DOTPROD_ASM +set(KLEIDIAI_FILES_NEON_DOTPROD_GENERIC kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod_asm.S -) - -set(KLEIDIAI_FILES_NEON_DOTPROD_GENERIC - ${KLEIDIAI_FILES_NEON_DOTPROD_ASM} kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c @@ -159,14 +151,10 @@ set(KLEIDIAI_FILES_NEON_DOTPROD kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c ) -set(KLEIDIAI_FILES_NEON_I8MM_ASM +set(KLEIDIAI_FILES_NEON_I8MM_GENERIC kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_asm.S -) - -set(KLEIDIAI_FILES_NEON_I8MM_GENERIC - ${KLEIDIAI_FILES_NEON_I8MM_ASM} kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c @@ -212,51 +200,53 @@ set(KLEIDIAI_FILES_SME2 ) add_library(kleidiai) +target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SCALAR}) # Selectively enable architecture features. # -# Note: MSVC compiler does not support inline assembly for Arm® processors hence it cannot be used to compile the -# optimized KleidiAI micro-kernels. It can however still be used to compile scalar routines. +# Note: MSVC compiler does not support inline assembly for Arm® processors hence it cannot be used to compile +# many of the optimized KleidiAI micro-kernels. It can however still be used to compile scalar routines. # https://learn.microsoft.com/en-us/cpp/assembler/inline/inline-assembler?view=msvc-170 # -if((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")) - if(NOT MSVC) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SCALAR}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_BF16}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16_BF16}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2}) - - set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+dotprod${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+i8mm${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - - # Use -fno-tree-vectorize option to disable compiler based vectorization - set_source_files_properties(${KLEIDIAI_FILES_SME} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") - set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") - else() - target_sources(kleidiai PRIVATE - ${KLEIDIAI_FILES_SCALAR_GENERIC} - ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} - ${KLEIDIAI_FILES_NEON_I8MM_GENERIC} - ) - - set_source_files_properties(${KLEIDIAI_FILES_SCALAR_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.0) - set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2) - set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2) - - set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_ASM} PROPERTIES LANGUAGE ASM_MARMASM) - set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_ASM} PROPERTIES LANGUAGE ASM_MARMASM) - endif() +if(NOT MSVC) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_BF16}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16_BF16}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SME2}) + + set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON} PROPERTIES COMPILE_OPTIONS -march=armv8-a${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_FP16_BF16} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+bf16+fp16${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+dotprod${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM} PROPERTIES COMPILE_OPTIONS -march=armv8.2-a+i8mm${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + + # Use -fno-tree-vectorize option to disable compiler based vectorization + set_source_files_properties(${KLEIDIAI_FILES_SME} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") + set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") +else() + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM_GENERIC}) + + set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS /arch:armv8.0${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + + # Copy the list before filtering to not loose the .c-files from the original + set(KLEIDIAI_FILES_NEON_DOTPROD_ASM ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC}) + set(KLEIDIAI_FILES_NEON_I8MM_ASM ${KLEIDIAI_FILES_NEON_I8MM_GENERIC}) + + list(FILTER KLEIDIAI_FILES_NEON_DOTPROD_ASM INCLUDE REGEX "^.*\.S$") + list(FILTER KLEIDIAI_FILES_NEON_I8MM_ASM INCLUDE REGEX "^.*\.S$") + + set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_ASM} PROPERTIES LANGUAGE ASM_MARMASM) + set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_ASM} PROPERTIES LANGUAGE ASM_MARMASM) endif() target_include_directories(kleidiai -- GitLab From 955a78deb35537a65325ea846214ca8c98b6ce3c Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Mon, 3 Feb 2025 15:49:10 +0100 Subject: [PATCH 3/4] Address review comments - Restore file ordering - Remove comment - Use one common list of asm files when setting language property Signed-off-by: Jens Elofsson --- CMakeLists.txt | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 210317b8..bfcd3850 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,12 +86,12 @@ endif() set(KLEIDIAI_FILES_SCALAR kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c + kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c - kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c ) set(KLEIDIAI_FILES_NEON_FP16 @@ -203,12 +203,6 @@ add_library(kleidiai) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_SCALAR}) # Selectively enable architecture features. -# -# Note: MSVC compiler does not support inline assembly for Arm® processors hence it cannot be used to compile -# many of the optimized KleidiAI micro-kernels. It can however still be used to compile scalar routines. -# https://learn.microsoft.com/en-us/cpp/assembler/inline/inline-assembler?view=msvc-170 -# - if(NOT MSVC) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON}) target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_FP16}) @@ -238,15 +232,12 @@ else() set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - # Copy the list before filtering to not loose the .c-files from the original - set(KLEIDIAI_FILES_NEON_DOTPROD_ASM ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC}) - set(KLEIDIAI_FILES_NEON_I8MM_ASM ${KLEIDIAI_FILES_NEON_I8MM_GENERIC}) - - list(FILTER KLEIDIAI_FILES_NEON_DOTPROD_ASM INCLUDE REGEX "^.*\.S$") - list(FILTER KLEIDIAI_FILES_NEON_I8MM_ASM INCLUDE REGEX "^.*\.S$") + set(KLEIDIAI_FILES_ASM + ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} + ${KLEIDIAI_FILES_NEON_I8MM_GENERIC}) + list(FILTER KLEIDIAI_FILES_ASM INCLUDE REGEX "^.*\.S$") - set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_ASM} PROPERTIES LANGUAGE ASM_MARMASM) - set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_ASM} PROPERTIES LANGUAGE ASM_MARMASM) + set_source_files_properties(${KLEIDIAI_FILES_ASM} PROPERTIES LANGUAGE ASM_MARMASM) endif() target_include_directories(kleidiai -- GitLab From 679d795c0a6703bb912b2cd69770c83ca31cd57d Mon Sep 17 00:00:00 2001 From: Jens Elofsson Date: Tue, 4 Feb 2025 10:28:58 +0100 Subject: [PATCH 4/4] Address review comments - Replace _GENERIC with _ASM the DOTPROD and I8MM lists. Signed-off-by: Jens Elofsson --- CMakeLists.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bfcd3850..97ce7eb1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,7 +125,7 @@ set(KLEIDIAI_FILES_NEON kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c ) -set(KLEIDIAI_FILES_NEON_DOTPROD_GENERIC +set(KLEIDIAI_FILES_NEON_DOTPROD_ASM kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod_asm.S @@ -137,7 +137,7 @@ set(KLEIDIAI_FILES_NEON_DOTPROD_GENERIC ) set(KLEIDIAI_FILES_NEON_DOTPROD - ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} + ${KLEIDIAI_FILES_NEON_DOTPROD_ASM} kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c @@ -151,7 +151,7 @@ set(KLEIDIAI_FILES_NEON_DOTPROD kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c ) -set(KLEIDIAI_FILES_NEON_I8MM_GENERIC +set(KLEIDIAI_FILES_NEON_I8MM_ASM kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm_asm.S kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm_asm.S @@ -161,7 +161,7 @@ set(KLEIDIAI_FILES_NEON_I8MM_GENERIC ) set(KLEIDIAI_FILES_NEON_I8MM - ${KLEIDIAI_FILES_NEON_I8MM_GENERIC} + ${KLEIDIAI_FILES_NEON_I8MM_ASM} kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c @@ -225,16 +225,16 @@ if(NOT MSVC) set_source_files_properties(${KLEIDIAI_FILES_SME} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") set_source_files_properties(${KLEIDIAI_FILES_SME2} PROPERTIES COMPILE_OPTIONS "-fno-tree-vectorize;-march=armv8.2-a+sve+sve2${KLEIDIAI_INTERNAL_EXTRA_ARCH}") else() - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC}) - target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM_GENERIC}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_DOTPROD_ASM}) + target_sources(kleidiai PRIVATE ${KLEIDIAI_FILES_NEON_I8MM_ASM}) set_source_files_properties(${KLEIDIAI_FILES_SCALAR} PROPERTIES COMPILE_OPTIONS /arch:armv8.0${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) - set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_I8MM_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) + set_source_files_properties(${KLEIDIAI_FILES_NEON_DOTPROD_ASM} PROPERTIES COMPILE_OPTIONS /arch:armv8.2${KLEIDIAI_INTERNAL_EXTRA_ARCH}) set(KLEIDIAI_FILES_ASM - ${KLEIDIAI_FILES_NEON_DOTPROD_GENERIC} - ${KLEIDIAI_FILES_NEON_I8MM_GENERIC}) + ${KLEIDIAI_FILES_NEON_DOTPROD_ASM} + ${KLEIDIAI_FILES_NEON_I8MM_ASM}) list(FILTER KLEIDIAI_FILES_ASM INCLUDE REGEX "^.*\.S$") set_source_files_properties(${KLEIDIAI_FILES_ASM} PROPERTIES LANGUAGE ASM_MARMASM) -- GitLab