diff --git a/test/common/bfloat16.hpp b/test/common/bfloat16.hpp index a8ba195b5c21b26e6c21cd66390195d1fa1b5d6e..f7b0e418b78e64f01532a9513dda94723c37e867 100644 --- a/test/common/bfloat16.hpp +++ b/test/common/bfloat16.hpp @@ -25,6 +25,17 @@ public: explicit BFloat16(float value) : m_data(float_to_bfloat16_round_towards_zero(value)) { } + /// Creates a new half-precision brain floating-point value from the raw data. + /// + /// @param[in] data The binary representation of the floating-point value. + /// + /// @return The half-precision brain floating-point value. + static constexpr BFloat16 from_binary(uint16_t data) { + BFloat16 value{}; + value.m_data = data; + return value; + } + /// Assigns to the specified numeric value which will be converted to `bfloat16_t`. template , bool> = true> BFloat16& operator=(T value) { diff --git a/test/common/float16.hpp b/test/common/float16.hpp index 3dc7768429a1e56b7556a948bdda255b30d73b8a..1b4eb0ba33f3a7b62084dc778204293a9eea3c6a 100644 --- a/test/common/float16.hpp +++ b/test/common/float16.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -104,6 +104,17 @@ public: explicit Float16(float value) : m_data(kai_test_float16_from_float(value)) { } + /// Creates a new half-precision floating-point value from the raw data. + /// + /// @param[in] data The binary representation of the floating-point value. + /// + /// @return The half-precision floating-point value. + static constexpr Float16 from_binary(uint16_t data) { + Float16 value{}; + value.m_data = data; + return value; + } + /// Assigns to the specified numeric value. template , bool> = true> Float16& operator=(T value) { diff --git a/test/common/numeric_limits.hpp b/test/common/numeric_limits.hpp index e59508108f5c1751c534c31c520c622c49e5df61..04189a990d725149462aa4fa3dfd3c67014c9b81 100644 --- a/test/common/numeric_limits.hpp +++ b/test/common/numeric_limits.hpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -7,33 +7,52 @@ #pragma once #include +#include +#include "test/common/bfloat16.hpp" +#include "test/common/float16.hpp" #include "test/common/int4.hpp" namespace kai::test { /// Highest finite value of type `T`. template -inline constexpr T numeric_highest = std::numeric_limits::max(); +inline constexpr std::enable_if_t, T> numeric_highest = std::numeric_limits::max(); -/// Highest finite value of type `T`. +/// Highest finite value of @ref UInt4. template <> inline constexpr UInt4 numeric_highest{15}; -/// Highest finite value of type `T`. +/// Highest finite value of @ref Int4. template <> inline constexpr Int4 numeric_highest{7}; +/// Highest finite value of @ref Float16. +template <> +inline constexpr Float16 numeric_highest = Float16::from_binary(0x7bff); + +/// Highest finite value of @ref BFloat16. +template <> +inline constexpr BFloat16 numeric_highest = BFloat16::from_binary(0x7f7f); + /// Lowest finite value of type `T`. template -inline constexpr T numeric_lowest = std::numeric_limits::lowest(); +inline constexpr std::enable_if_t, T> numeric_lowest = std::numeric_limits::lowest(); -/// Lowest finite value of type `T`. +/// Lowest finite value of @ref UInt4. template <> inline constexpr UInt4 numeric_lowest{0}; -/// Lowest finite value of type `T`. +/// Lowest finite value of @ref Int4. template <> inline constexpr Int4 numeric_lowest{-8}; +/// Lowest finite value of @ref Float16. +template <> +inline constexpr Float16 numeric_lowest = Float16::from_binary(0xfbff); + +/// Lowest finite value of @ref BFloat16. +template <> +inline constexpr BFloat16 numeric_lowest = BFloat16::from_binary(0xff7f); + } // namespace kai::test diff --git a/test/reference/quantize.cpp b/test/reference/quantize.cpp index 7a2fb198f78f5da474ec419eccf5c09e239f7fd6..477a13c22c4ef46908316b1bc55a0be2b0697d8e 100644 --- a/test/reference/quantize.cpp +++ b/test/reference/quantize.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -204,8 +203,8 @@ std::tuple, std::vector> compute_asymmetric_per_bl for (size_t y = 0; y < height; ++y) { for (size_t x_quant = 0; x_quant < width; x_quant += quant_width) { // Computes the quantization scale and zero point. - auto min_value = std::numeric_limits::max(); - auto max_value = std::numeric_limits::lowest(); + auto min_value = numeric_highest; + auto max_value = numeric_lowest; for (size_t x_element = 0; x_element < quant_width; ++x_element) { const auto x = x_quant + x_element; diff --git a/test/tests/bfloat16_test.cpp b/test/tests/bfloat16_test.cpp index 996d2e6809fc4fd3414670bdaa0310be13f10d86..f94ce57ee112d3b721a290272a548d7cbd742aef 100644 --- a/test/tests/bfloat16_test.cpp +++ b/test/tests/bfloat16_test.cpp @@ -8,7 +8,7 @@ #include -#include "test/common/cpu_info.hpp" +#include "test/common/numeric_limits.hpp" namespace kai::test { @@ -28,4 +28,9 @@ TEST(BFloat16, SimpleTest) { ASSERT_TRUE(BFloat16(2.0F) != BFloat16(1.25F)); } +TEST(BFloat16, NumericLimitTest) { + ASSERT_EQ(static_cast(numeric_lowest), -338953138925153547590470800371487866880.0F); + ASSERT_EQ(static_cast(numeric_highest), 338953138925153547590470800371487866880.0F); +} + } // namespace kai::test diff --git a/test/tests/float16_test.cpp b/test/tests/float16_test.cpp index ea919fdd5a1a38d885ccac9dcdd3a55f2b7d4080..8941810ed68886c67b4b9859adfc0e288c5f3225 100644 --- a/test/tests/float16_test.cpp +++ b/test/tests/float16_test.cpp @@ -9,6 +9,7 @@ #include #include "test/common/cpu_info.hpp" +#include "test/common/numeric_limits.hpp" namespace kai::test { @@ -62,4 +63,9 @@ TEST(Float16, SimpleTest) { ASSERT_EQ(a, Float16(1.25F)); } +TEST(Float16, NumericLimitTest) { + ASSERT_EQ(static_cast(numeric_lowest), -65504.0F); + ASSERT_EQ(static_cast(numeric_highest), 65504.0F); +} + } // namespace kai::test