From 6e866e222375d1b59d8af7e15d27383cf8768b1f Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 16 Apr 2025 12:02:20 +0100 Subject: [PATCH] Remove truncating version of kai_cast_bf16_f32 * The implementation of kai_cast_bf16_f32 in kai_common.h violates the one definition rule (ODR). Depending on the archirectural feature enabled by the compiler, the function is implemented differently. * In the kernel library, kai_cast_bf16_f32 is only used by microkernels that are compiled with BF16 enabled, therefore this change doesn't affect the functionality of the microkernel. * The example code needs to be compiled with BF16 enabled to match with the behavior of the microkernel. Signed-off-by: Viet-Hoa Do --- .../matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt | 4 ++-- kai/kai_common.h | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt index 28bbd67c..09fe2f45 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -42,4 +42,4 @@ add_executable(matmul_clamp_f32_qai8dxp_qsi4c32p ) target_compile_options(matmul_clamp_f32_qai8dxp_qsi4c32p - PRIVATE -march=armv8.2-a+dotprod+i8mm) + PRIVATE -march=armv8.2-a+dotprod+i8mm+bf16) diff --git a/kai/kai_common.h b/kai/kai_common.h index 6dc71d10..bec270c3 100644 --- a/kai/kai_common.h +++ b/kai/kai_common.h @@ -111,21 +111,20 @@ inline static float kai_cast_f32_bf16(uint16_t bf16) { return f32; } +#ifdef __ARM_FEATURE_BF16 + /// Converts a f32 value to bf16 /// @param[in] f32 The f32 value /// /// @return the bf16 value inline static uint16_t kai_cast_bf16_f32(float f32) { uint16_t bf16 = 0; -#ifdef __ARM_FEATURE_BF16 __asm__ __volatile__("bfcvt %h[output], %s[input]" : [output] "=w"(bf16) : [input] "w"(f32)); -#else - const uint32_t* i32 = (uint32_t*)(&f32); - bf16 = (*i32 >> 16); -#endif return bf16; } +#endif // __ARM_FEATURE_BF16 + /// Converts a scalar f32 value to f16 /// @param[in] f32 The f32 value /// -- GitLab