From f3f36ff178043af942a49ce2f775ca8c0118dd3d Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 7 May 2025 12:21:55 +0100 Subject: [PATCH 1/2] Fix clamping issue * Numeric limits report the lowest and highest finite values of F16 and BF16 to be 0 which disables testing of all F16 and BF16 kernels with clamping. * Update numeric limits to have the correct limits. * Update numeric limits to make sure compilation error when a type is not supported. Signed-off-by: Viet-Hoa Do --- test/common/bfloat16.hpp | 11 +++++++++++ test/common/float16.hpp | 13 ++++++++++++- test/common/numeric_limits.hpp | 25 ++++++++++++++++++++++--- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/test/common/bfloat16.hpp b/test/common/bfloat16.hpp index a8ba195b..f7b0e418 100644 --- a/test/common/bfloat16.hpp +++ b/test/common/bfloat16.hpp @@ -25,6 +25,17 @@ public: explicit BFloat16(float value) : m_data(float_to_bfloat16_round_towards_zero(value)) { } + /// Creates a new half-precision brain floating-point value from the raw data. + /// + /// @param[in] data The binary representation of the floating-point value. + /// + /// @return The half-precision brain floating-point value. + static constexpr BFloat16 from_binary(uint16_t data) { + BFloat16 value{}; + value.m_data = data; + return value; + } + /// Assigns to the specified numeric value which will be converted to `bfloat16_t`. template , bool> = true> BFloat16& operator=(T value) { diff --git a/test/common/float16.hpp b/test/common/float16.hpp index 3dc77684..1b4eb0ba 100644 --- a/test/common/float16.hpp +++ b/test/common/float16.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -104,6 +104,17 @@ public: explicit Float16(float value) : m_data(kai_test_float16_from_float(value)) { } + /// Creates a new half-precision floating-point value from the raw data. + /// + /// @param[in] data The binary representation of the floating-point value. + /// + /// @return The half-precision floating-point value. + static constexpr Float16 from_binary(uint16_t data) { + Float16 value{}; + value.m_data = data; + return value; + } + /// Assigns to the specified numeric value. template , bool> = true> Float16& operator=(T value) { diff --git a/test/common/numeric_limits.hpp b/test/common/numeric_limits.hpp index e5950810..a11fd18e 100644 --- a/test/common/numeric_limits.hpp +++ b/test/common/numeric_limits.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -7,14 +7,17 @@ #pragma once #include +#include +#include "test/common/bfloat16.hpp" +#include "test/common/float16.hpp" #include "test/common/int4.hpp" namespace kai::test { /// Highest finite value of type `T`. template -inline constexpr T numeric_highest = std::numeric_limits::max(); +inline constexpr std::enable_if_t, T> numeric_highest = std::numeric_limits::max(); /// Highest finite value of type `T`. template <> @@ -24,9 +27,17 @@ inline constexpr UInt4 numeric_highest{15}; template <> inline constexpr Int4 numeric_highest{7}; +/// Highest finite value of type `T`. +template <> +inline constexpr Float16 numeric_highest = Float16::from_binary(0x7bff); + +/// Highest finite value of type `T`. +template <> +inline constexpr BFloat16 numeric_highest = BFloat16::from_binary(0x7f7f); + /// Lowest finite value of type `T`. template -inline constexpr T numeric_lowest = std::numeric_limits::lowest(); +inline constexpr std::enable_if_t, T> numeric_lowest = std::numeric_limits::lowest(); /// Lowest finite value of type `T`. template <> @@ -36,4 +47,12 @@ inline constexpr UInt4 numeric_lowest{0}; template <> inline constexpr Int4 numeric_lowest{-8}; +/// Highest finite value of type `T`. +template <> +inline constexpr Float16 numeric_lowest = Float16::from_binary(0xfbff); + +/// Highest finite value of type `T`. +template <> +inline constexpr BFloat16 numeric_lowest = BFloat16::from_binary(0xff7f); + } // namespace kai::test -- GitLab From 39a89d65d8c47a75f316ba5dbc6f51a5f3dd2b5d Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 21 May 2025 15:27:44 +0100 Subject: [PATCH 2/2] Address review comments Signed-off-by: Viet-Hoa Do --- test/common/numeric_limits.hpp | 16 ++++++++-------- test/reference/quantize.cpp | 5 ++--- test/tests/bfloat16_test.cpp | 7 ++++++- test/tests/float16_test.cpp | 6 ++++++ 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/test/common/numeric_limits.hpp b/test/common/numeric_limits.hpp index a11fd18e..04189a99 100644 --- a/test/common/numeric_limits.hpp +++ b/test/common/numeric_limits.hpp @@ -19,19 +19,19 @@ namespace kai::test { template inline constexpr std::enable_if_t, T> numeric_highest = std::numeric_limits::max(); -/// Highest finite value of type `T`. +/// Highest finite value of @ref UInt4. template <> inline constexpr UInt4 numeric_highest{15}; -/// Highest finite value of type `T`. +/// Highest finite value of @ref Int4. template <> inline constexpr Int4 numeric_highest{7}; -/// Highest finite value of type `T`. +/// Highest finite value of @ref Float16. template <> inline constexpr Float16 numeric_highest = Float16::from_binary(0x7bff); -/// Highest finite value of type `T`. +/// Highest finite value of @ref BFloat16. template <> inline constexpr BFloat16 numeric_highest = BFloat16::from_binary(0x7f7f); @@ -39,19 +39,19 @@ inline constexpr BFloat16 numeric_highest = BFloat16::from_binary(0x7f template inline constexpr std::enable_if_t, T> numeric_lowest = std::numeric_limits::lowest(); -/// Lowest finite value of type `T`. +/// Lowest finite value of @ref UInt4. template <> inline constexpr UInt4 numeric_lowest{0}; -/// Lowest finite value of type `T`. +/// Lowest finite value of @ref Int4. template <> inline constexpr Int4 numeric_lowest{-8}; -/// Highest finite value of type `T`. +/// Lowest finite value of @ref Float16. template <> inline constexpr Float16 numeric_lowest = Float16::from_binary(0xfbff); -/// Highest finite value of type `T`. +/// Lowest finite value of @ref BFloat16. template <> inline constexpr BFloat16 numeric_lowest = BFloat16::from_binary(0xff7f); diff --git a/test/reference/quantize.cpp b/test/reference/quantize.cpp index 7a2fb198..477a13c2 100644 --- a/test/reference/quantize.cpp +++ b/test/reference/quantize.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -204,8 +203,8 @@ std::tuple, std::vector> compute_asymmetric_per_bl for (size_t y = 0; y < height; ++y) { for (size_t x_quant = 0; x_quant < width; x_quant += quant_width) { // Computes the quantization scale and zero point. - auto min_value = std::numeric_limits::max(); - auto max_value = std::numeric_limits::lowest(); + auto min_value = numeric_highest; + auto max_value = numeric_lowest; for (size_t x_element = 0; x_element < quant_width; ++x_element) { const auto x = x_quant + x_element; diff --git a/test/tests/bfloat16_test.cpp b/test/tests/bfloat16_test.cpp index 996d2e68..f94ce57e 100644 --- a/test/tests/bfloat16_test.cpp +++ b/test/tests/bfloat16_test.cpp @@ -8,7 +8,7 @@ #include -#include "test/common/cpu_info.hpp" +#include "test/common/numeric_limits.hpp" namespace kai::test { @@ -28,4 +28,9 @@ TEST(BFloat16, SimpleTest) { ASSERT_TRUE(BFloat16(2.0F) != BFloat16(1.25F)); } +TEST(BFloat16, NumericLimitTest) { + ASSERT_EQ(static_cast(numeric_lowest), -338953138925153547590470800371487866880.0F); + ASSERT_EQ(static_cast(numeric_highest), 338953138925153547590470800371487866880.0F); +} + } // namespace kai::test diff --git a/test/tests/float16_test.cpp b/test/tests/float16_test.cpp index ea919fdd..8941810e 100644 --- a/test/tests/float16_test.cpp +++ b/test/tests/float16_test.cpp @@ -9,6 +9,7 @@ #include #include "test/common/cpu_info.hpp" +#include "test/common/numeric_limits.hpp" namespace kai::test { @@ -62,4 +63,9 @@ TEST(Float16, SimpleTest) { ASSERT_EQ(a, Float16(1.25F)); } +TEST(Float16, NumericLimitTest) { + ASSERT_EQ(static_cast(numeric_lowest), -65504.0F); + ASSERT_EQ(static_cast(numeric_highest), 65504.0F); +} + } // namespace kai::test -- GitLab