diff --git a/CMakeLists.txt b/CMakeLists.txt index ad8f5580a873f9eeb5cded48e45909ee7856d7ab..22d650f41d8ac195417265c897c430ef74b35958 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -330,7 +330,6 @@ if(KLEIDIAI_BUILD_TESTS) add_library(kleidiai_test_framework test/common/bfloat16.cpp - test/common/bfloat16_asm.S test/common/compare.cpp test/common/cpu_info.cpp test/common/data_format.cpp diff --git a/test/common/bfloat16.cpp b/test/common/bfloat16.cpp index 26e9259a7c3283af4ddd43ed206ece50c82628f7..bd70de26fc3df76fc0d7d172e7509994a9ff4e62 100644 --- a/test/common/bfloat16.cpp +++ b/test/common/bfloat16.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -30,4 +30,12 @@ std::ostream& operator<<(std::ostream& os, BFloat16 value) { return os << static_cast(value); } +uint16_t BFloat16::float_to_bfloat16_round_towards_zero(float value) { + uint32_t value_u32; + + memcpy(&value_u32, &value, sizeof(value)); + + return value_u32 >> 16; +} + } // namespace kai::test diff --git a/test/common/bfloat16.hpp b/test/common/bfloat16.hpp index e616657a65903f0e5d4301afe81b45c87a5d2d0b..a8ba195b5c21b26e6c21cd66390195d1fa1b5d6e 100644 --- a/test/common/bfloat16.hpp +++ b/test/common/bfloat16.hpp @@ -13,17 +13,6 @@ #include "test/common/type_traits.hpp" -extern "C" { - -/// Converts single-precision floating-point to half-precision brain floating-point. -/// -/// @params[in] value The single-precision floating-point value. -/// -/// @return The half-precision brain floating-point value reinterpreted as 16-bit unsigned integer. -uint16_t kai_test_bfloat16_from_float(float value); - -} // extern "C" - namespace kai::test { /// Half-precision brain floating-point. @@ -33,14 +22,14 @@ public: BFloat16() = default; /// Creates a new object from the specified numeric value. - explicit BFloat16(float value) : m_data(kai_test_bfloat16_from_float(value)) { + explicit BFloat16(float value) : m_data(float_to_bfloat16_round_towards_zero(value)) { } /// Assigns to the specified numeric value which will be converted to `bfloat16_t`. template , bool> = true> BFloat16& operator=(T value) { const auto value_f32 = static_cast(value); - m_data = kai_test_bfloat16_from_float(value_f32); + m_data = float_to_bfloat16_round_towards_zero(value_f32); return *this; } @@ -73,6 +62,8 @@ private: /// @return The output stream. friend std::ostream& operator<<(std::ostream& os, BFloat16 value); + static uint16_t float_to_bfloat16_round_towards_zero(float value); + uint16_t m_data; }; diff --git a/test/common/bfloat16_asm.S b/test/common/bfloat16_asm.S deleted file mode 100644 index 9f16cda4d7a70fef9b31d3e8c5a55e0bc1ae90b7..0000000000000000000000000000000000000000 --- a/test/common/bfloat16_asm.S +++ /dev/null @@ -1,18 +0,0 @@ -// -// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates -// -// SPDX-License-Identifier: Apache-2.0 -// - -#include "test/common/assembly.h" - - KAI_ASM_HEADER - - KAI_ASM_EXPORT(kai_test_bfloat16_from_float) - -KAI_ASM_FUNCTION(kai_test_bfloat16_from_float) - KAI_ASM_INST(0x1e634000) // bfcvt h0, s0 - fmov w0, h0 - ret - - KAI_ASM_FOOTER diff --git a/test/tests/bfloat16_test.cpp b/test/tests/bfloat16_test.cpp index 8a2886c84788a03189b58fc727f3e7fabc9c5320..996d2e6809fc4fd3414670bdaa0310be13f10d86 100644 --- a/test/tests/bfloat16_test.cpp +++ b/test/tests/bfloat16_test.cpp @@ -13,13 +13,11 @@ namespace kai::test { TEST(BFloat16, SimpleTest) { - if (!cpu_has_bf16()) { - GTEST_SKIP() << "No CPU support for BFloat16"; - } - ASSERT_EQ(static_cast(BFloat16()), 0.0F); ASSERT_EQ(static_cast(BFloat16(1.25F)), 1.25F); + ASSERT_EQ(static_cast(BFloat16(-1.25F)), -1.25F); ASSERT_EQ(static_cast(BFloat16(3)), 3.0F); + ASSERT_EQ(static_cast(BFloat16(-3)), -3.0F); ASSERT_FALSE(BFloat16(1.25F) == BFloat16(2.0F)); ASSERT_TRUE(BFloat16(1.25F) == BFloat16(1.25F)); diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp index 954496bfef3d2108ae6592df0f62a270ea6587d6..9a0d24691b3acf47c3dad09a1584ae63740d0235 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp @@ -50,35 +50,30 @@ #include "test/reference/transpose.hpp" namespace kai::test { - -// Test code contains calls to quantization functions using bf16. Additional check for BFloat16 CPU support required. -static auto cpu_has_dotprod_and_bf16 = []() { return cpu_has_dotprod() && cpu_has_bf16(); }; -static auto cpu_has_i8mm_and_bf16 = []() { return cpu_has_i8mm() && cpu_has_bf16(); }; - static const std::array, 11> variants_kai_matmul_clamp_f32_qai8dxp_qsi4c32p = { {{UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod), - "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod", cpu_has_dotprod_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod", cpu_has_dotprod}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod), - "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod", cpu_has_dotprod_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod", cpu_has_dotprod}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod), - "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod", cpu_has_dotprod_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod", cpu_has_dotprod}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod), - "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod", cpu_has_dotprod_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod", cpu_has_dotprod}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod), - "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod", cpu_has_dotprod_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod", cpu_has_dotprod}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod), - "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod", cpu_has_dotprod_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p4x4_16x4_neon_dotprod", cpu_has_dotprod}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod), - "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod", cpu_has_dotprod_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp4x4_qsi4c32p8x4_4x8_neon_dotprod", cpu_has_dotprod}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm), - "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm", cpu_has_i8mm_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm", cpu_has_i8mm}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm), - "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm", cpu_has_i8mm_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm", cpu_has_i8mm}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm), - "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm", cpu_has_i8mm_and_bf16}, + "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm", cpu_has_i8mm}, {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm), - "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm", cpu_has_i8mm_and_bf16}}}; + "kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8_neon_i8mm", cpu_has_i8mm}}}; using MatMulTestParams_withBL = std::tuple;