diff --git a/kai/kai_common.h b/kai/kai_common.h index dc815f679dbf369b7de8ae0184aa91b4acdd1a38..8e22fa3c7dee493f8924313fec34cfc7beba7fb7 100644 --- a/kai/kai_common.h +++ b/kai/kai_common.h @@ -104,7 +104,7 @@ inline static float kai_cast_f32_bf16(uint16_t bf16) { inline static uint16_t kai_cast_bf16_f32(float f32) { uint16_t bf16 = 0; #ifdef __ARM_FEATURE_BF16 - asm("bfcvt %h[output], %s[input]" : [output] "=w"(bf16) : [input] "w"(f32)); + __asm__ __volatile__("bfcvt %h[output], %s[input]" : [output] "=w"(bf16) : [input] "w"(f32)); #else const uint32_t* i32 = (uint32_t*)(&f32); bf16 = (*i32 >> 16); @@ -135,7 +135,7 @@ inline static size_t kai_roundup(size_t a, size_t b) { inline static uint64_t kai_get_sme_vector_length_u8(void) { uint64_t res = 0; - __asm __volatile( + __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" "cntb %0\n" ".inst 0xd503467f // SMSTOP\n" @@ -151,7 +151,7 @@ inline static uint64_t kai_get_sme_vector_length_u8(void) { inline static uint64_t kai_get_sme_vector_length_u16(void) { uint64_t res = 0; - __asm __volatile( + __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" "cnth %0\n" ".inst 0xd503467f // SMSTOP\n" @@ -167,7 +167,7 @@ inline static uint64_t kai_get_sme_vector_length_u16(void) { inline static uint64_t kai_get_sme_vector_length_u32(void) { uint64_t res = 0; - __asm __volatile( + __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" "cntw %0\n" ".inst 0xd503467f // SMSTOP\n" diff --git a/test/common/bfloat16.hpp b/test/common/bfloat16.hpp index 94c362e395b21e7cff4a8f94dec9092b632f3345..ca0e0b371f6bad9f6b00ca55fd6a3e09c331171a 100644 --- a/test/common/bfloat16.hpp +++ b/test/common/bfloat16.hpp @@ -40,7 +40,7 @@ public: /// Creates a new object from the specified numeric value. BFloat16(float value) : _data(0) { #ifdef __ARM_FEATURE_BF16 - asm("bfcvt %h[output], %s[input]" : [output] "=w"(_data) : [input] "w"(value)); + __asm__ __volatile__("bfcvt %h[output], %s[input]" : [output] "=w"(_data) : [input] "w"(value)); #else const uint32_t* value_i32 = reinterpret_cast(&value); _data = (*value_i32 >> 16); @@ -52,7 +52,7 @@ public: BFloat16& operator=(T value) { const auto value_f32 = static_cast(value); #ifdef __ARM_FEATURE_BF16 - asm("bfcvt %h[output], %s[input]" : [output] "=w"(_data) : [input] "w"(value_f32)); + __asm__ __volatile__("bfcvt %h[output], %s[input]" : [output] "=w"(_data) : [input] "w"(value_f32)); #else const uint32_t* value_i32 = reinterpret_cast(&value_f32); _data = (*value_i32 >> 16); diff --git a/test/common/round.cpp b/test/common/round.cpp index 08f6c7edb52c41a6a7674838685891f9a6ab0d32..954d96da7d4a6aaf921920492eb06ee4f7ef54cf 100644 --- a/test/common/round.cpp +++ b/test/common/round.cpp @@ -13,7 +13,7 @@ namespace kai::test { int32_t round_to_nearest_even_i32(float value) { int32_t rounded = 0; - asm("fcvtns %w[output], %s[input]" : [output] "=r"(rounded) : [input] "w"(value)); + __asm__ __volatile__("fcvtns %w[output], %s[input]" : [output] "=r"(rounded) : [input] "w"(value)); return rounded; } @@ -21,7 +21,7 @@ size_t round_to_nearest_even_usize(float value) { static_assert(sizeof(size_t) == sizeof(uint64_t)); uint64_t rounded = 0; - asm("fcvtns %x[output], %s[input]" : [output] "=r"(rounded) : [input] "w"(value)); + __asm__ __volatile__("fcvtns %x[output], %s[input]" : [output] "=r"(rounded) : [input] "w"(value)); return rounded; } diff --git a/test/common/sme.cpp b/test/common/sme.cpp index b13e991014567bd5298c34ecf5b790d7a9df26d1..86b6bc356060431065679cf2fca7dee9e247262a 100644 --- a/test/common/sme.cpp +++ b/test/common/sme.cpp @@ -20,7 +20,7 @@ uint64_t get_sme_vector_length<1>() { if (res == 0) { if (cpu_has_sme()) { - __asm __volatile( + __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" "cntb %0\n" ".inst 0xd503467f // SMSTOP\n" @@ -43,7 +43,7 @@ uint64_t get_sme_vector_length<2>() { if (res == 0) { if (cpu_has_sme()) { - __asm __volatile( + __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" "cnth %0\n" ".inst 0xd503467f // SMSTOP\n" @@ -66,7 +66,7 @@ uint64_t get_sme_vector_length<4>() { if (res == 0) { if (cpu_has_sme()) { - __asm __volatile( + __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" "cntw %0\n" ".inst 0xd503467f // SMSTOP\n"