From 6e866e222375d1b59d8af7e15d27383cf8768b1f Mon Sep 17 00:00:00 2001
From: Viet-Hoa Do <viet-hoa.do@arm.com>
Date: Wed, 16 Apr 2025 12:02:20 +0100
Subject: [PATCH] Remove truncating version of kai_cast_bf16_f32

* The implementation of kai_cast_bf16_f32 in kai_common.h
  violates the one definition rule (ODR). Depending on the
  archirectural feature enabled by the compiler, the function
  is implemented differently.
* In the kernel library, kai_cast_bf16_f32 is only used by
  microkernels that are compiled with BF16 enabled, therefore
  this change doesn't affect the functionality of the microkernel.
* The example code needs to be compiled with BF16 enabled to match
  with the behavior of the microkernel.

Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
---
 .../matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt     | 4 ++--
 kai/kai_common.h                                         | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt
index 28bbd67c..09fe2f45 100644
--- a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt
+++ b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -42,4 +42,4 @@ add_executable(matmul_clamp_f32_qai8dxp_qsi4c32p
     )
 
 target_compile_options(matmul_clamp_f32_qai8dxp_qsi4c32p
-    PRIVATE -march=armv8.2-a+dotprod+i8mm)
+    PRIVATE -march=armv8.2-a+dotprod+i8mm+bf16)
diff --git a/kai/kai_common.h b/kai/kai_common.h
index 6dc71d10..bec270c3 100644
--- a/kai/kai_common.h
+++ b/kai/kai_common.h
@@ -111,21 +111,20 @@ inline static float kai_cast_f32_bf16(uint16_t bf16) {
     return f32;
 }
 
+#ifdef __ARM_FEATURE_BF16
+
 /// Converts a f32 value to bf16
 /// @param[in] f32 The f32 value
 ///
 /// @return the bf16 value
 inline static uint16_t kai_cast_bf16_f32(float f32) {
     uint16_t bf16 = 0;
-#ifdef __ARM_FEATURE_BF16
     __asm__ __volatile__("bfcvt %h[output], %s[input]" : [output] "=w"(bf16) : [input] "w"(f32));
-#else
-    const uint32_t* i32 = (uint32_t*)(&f32);
-    bf16 = (*i32 >> 16);
-#endif
     return bf16;
 }
 
+#endif  // __ARM_FEATURE_BF16
+
 /// Converts a scalar f32 value to f16
 /// @param[in] f32 The f32 value
 ///
-- 
GitLab