From f3f36ff178043af942a49ce2f775ca8c0118dd3d Mon Sep 17 00:00:00 2001
From: Viet-Hoa Do <viet-hoa.do@arm.com>
Date: Wed, 7 May 2025 12:21:55 +0100
Subject: [PATCH 1/2] Fix clamping issue

* Numeric limits report the lowest and highest finite values
  of F16 and BF16 to be 0 which disables testing of all F16
  and BF16 kernels with clamping.
* Update numeric limits to have the correct limits.
* Update numeric limits to make sure compilation error when
  a type is not supported.

Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
---
 test/common/bfloat16.hpp       | 11 +++++++++++
 test/common/float16.hpp        | 13 ++++++++++++-
 test/common/numeric_limits.hpp | 25 ++++++++++++++++++++++---
 3 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/test/common/bfloat16.hpp b/test/common/bfloat16.hpp
index a8ba195b..f7b0e418 100644
--- a/test/common/bfloat16.hpp
+++ b/test/common/bfloat16.hpp
@@ -25,6 +25,17 @@ public:
     explicit BFloat16(float value) : m_data(float_to_bfloat16_round_towards_zero(value)) {
     }
 
+    /// Creates a new half-precision brain floating-point value from the raw data.
+    ///
+    /// @param[in] data The binary representation of the floating-point value.
+    ///
+    /// @return The half-precision brain floating-point value.
+    static constexpr BFloat16 from_binary(uint16_t data) {
+        BFloat16 value{};
+        value.m_data = data;
+        return value;
+    }
+
     /// Assigns to the specified numeric value which will be converted to `bfloat16_t`.
     template <typename T, std::enable_if_t<is_arithmetic<T>, bool> = true>
     BFloat16& operator=(T value) {
diff --git a/test/common/float16.hpp b/test/common/float16.hpp
index 3dc77684..1b4eb0ba 100644
--- a/test/common/float16.hpp
+++ b/test/common/float16.hpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -104,6 +104,17 @@ public:
     explicit Float16(float value) : m_data(kai_test_float16_from_float(value)) {
     }
 
+    /// Creates a new half-precision floating-point value from the raw data.
+    ///
+    /// @param[in] data The binary representation of the floating-point value.
+    ///
+    /// @return The half-precision floating-point value.
+    static constexpr Float16 from_binary(uint16_t data) {
+        Float16 value{};
+        value.m_data = data;
+        return value;
+    }
+
     /// Assigns to the specified numeric value.
     template <typename T, std::enable_if_t<is_arithmetic<T>, bool> = true>
     Float16& operator=(T value) {
diff --git a/test/common/numeric_limits.hpp b/test/common/numeric_limits.hpp
index e5950810..a11fd18e 100644
--- a/test/common/numeric_limits.hpp
+++ b/test/common/numeric_limits.hpp
@@ -1,5 +1,5 @@
 //
-// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -7,14 +7,17 @@
 #pragma once
 
 #include <limits>
+#include <type_traits>
 
+#include "test/common/bfloat16.hpp"
+#include "test/common/float16.hpp"
 #include "test/common/int4.hpp"
 
 namespace kai::test {
 
 /// Highest finite value of type `T`.
 template <typename T>
-inline constexpr T numeric_highest = std::numeric_limits<T>::max();
+inline constexpr std::enable_if_t<std::is_arithmetic_v<T>, T> numeric_highest = std::numeric_limits<T>::max();
 
 /// Highest finite value of type `T`.
 template <>
@@ -24,9 +27,17 @@ inline constexpr UInt4 numeric_highest<UInt4>{15};
 template <>
 inline constexpr Int4 numeric_highest<Int4>{7};
 
+/// Highest finite value of type `T`.
+template <>
+inline constexpr Float16 numeric_highest<Float16> = Float16::from_binary(0x7bff);
+
+/// Highest finite value of type `T`.
+template <>
+inline constexpr BFloat16 numeric_highest<BFloat16> = BFloat16::from_binary(0x7f7f);
+
 /// Lowest finite value of type `T`.
 template <typename T>
-inline constexpr T numeric_lowest = std::numeric_limits<T>::lowest();
+inline constexpr std::enable_if_t<std::is_arithmetic_v<T>, T> numeric_lowest = std::numeric_limits<T>::lowest();
 
 /// Lowest finite value of type `T`.
 template <>
@@ -36,4 +47,12 @@ inline constexpr UInt4 numeric_lowest<UInt4>{0};
 template <>
 inline constexpr Int4 numeric_lowest<Int4>{-8};
 
+/// Highest finite value of type `T`.
+template <>
+inline constexpr Float16 numeric_lowest<Float16> = Float16::from_binary(0xfbff);
+
+/// Highest finite value of type `T`.
+template <>
+inline constexpr BFloat16 numeric_lowest<BFloat16> = BFloat16::from_binary(0xff7f);
+
 }  // namespace kai::test
-- 
GitLab


From 39a89d65d8c47a75f316ba5dbc6f51a5f3dd2b5d Mon Sep 17 00:00:00 2001
From: Viet-Hoa Do <viet-hoa.do@arm.com>
Date: Wed, 21 May 2025 15:27:44 +0100
Subject: [PATCH 2/2] Address review comments

Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
---
 test/common/numeric_limits.hpp | 16 ++++++++--------
 test/reference/quantize.cpp    |  5 ++---
 test/tests/bfloat16_test.cpp   |  7 ++++++-
 test/tests/float16_test.cpp    |  6 ++++++
 4 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/test/common/numeric_limits.hpp b/test/common/numeric_limits.hpp
index a11fd18e..04189a99 100644
--- a/test/common/numeric_limits.hpp
+++ b/test/common/numeric_limits.hpp
@@ -19,19 +19,19 @@ namespace kai::test {
 template <typename T>
 inline constexpr std::enable_if_t<std::is_arithmetic_v<T>, T> numeric_highest = std::numeric_limits<T>::max();
 
-/// Highest finite value of type `T`.
+/// Highest finite value of @ref UInt4.
 template <>
 inline constexpr UInt4 numeric_highest<UInt4>{15};
 
-/// Highest finite value of type `T`.
+/// Highest finite value of @ref Int4.
 template <>
 inline constexpr Int4 numeric_highest<Int4>{7};
 
-/// Highest finite value of type `T`.
+/// Highest finite value of @ref Float16.
 template <>
 inline constexpr Float16 numeric_highest<Float16> = Float16::from_binary(0x7bff);
 
-/// Highest finite value of type `T`.
+/// Highest finite value of @ref BFloat16.
 template <>
 inline constexpr BFloat16 numeric_highest<BFloat16> = BFloat16::from_binary(0x7f7f);
 
@@ -39,19 +39,19 @@ inline constexpr BFloat16 numeric_highest<BFloat16> = BFloat16::from_binary(0x7f
 template <typename T>
 inline constexpr std::enable_if_t<std::is_arithmetic_v<T>, T> numeric_lowest = std::numeric_limits<T>::lowest();
 
-/// Lowest finite value of type `T`.
+/// Lowest finite value of @ref UInt4.
 template <>
 inline constexpr UInt4 numeric_lowest<UInt4>{0};
 
-/// Lowest finite value of type `T`.
+/// Lowest finite value of @ref Int4.
 template <>
 inline constexpr Int4 numeric_lowest<Int4>{-8};
 
-/// Highest finite value of type `T`.
+/// Lowest finite value of @ref Float16.
 template <>
 inline constexpr Float16 numeric_lowest<Float16> = Float16::from_binary(0xfbff);
 
-/// Highest finite value of type `T`.
+/// Lowest finite value of @ref BFloat16.
 template <>
 inline constexpr BFloat16 numeric_lowest<BFloat16> = BFloat16::from_binary(0xff7f);
 
diff --git a/test/reference/quantize.cpp b/test/reference/quantize.cpp
index 7a2fb198..477a13c2 100644
--- a/test/reference/quantize.cpp
+++ b/test/reference/quantize.cpp
@@ -10,7 +10,6 @@
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
-#include <limits>
 #include <tuple>
 #include <vector>
 
@@ -204,8 +203,8 @@ std::tuple<std::vector<uint8_t>, std::vector<uint8_t>> compute_asymmetric_per_bl
     for (size_t y = 0; y < height; ++y) {
         for (size_t x_quant = 0; x_quant < width; x_quant += quant_width) {
             // Computes the quantization scale and zero point.
-            auto min_value = std::numeric_limits<SrcType>::max();
-            auto max_value = std::numeric_limits<SrcType>::lowest();
+            auto min_value = numeric_highest<SrcType>;
+            auto max_value = numeric_lowest<SrcType>;
 
             for (size_t x_element = 0; x_element < quant_width; ++x_element) {
                 const auto x = x_quant + x_element;
diff --git a/test/tests/bfloat16_test.cpp b/test/tests/bfloat16_test.cpp
index 996d2e68..f94ce57e 100644
--- a/test/tests/bfloat16_test.cpp
+++ b/test/tests/bfloat16_test.cpp
@@ -8,7 +8,7 @@
 
 #include <gtest/gtest.h>
 
-#include "test/common/cpu_info.hpp"
+#include "test/common/numeric_limits.hpp"
 
 namespace kai::test {
 
@@ -28,4 +28,9 @@ TEST(BFloat16, SimpleTest) {
     ASSERT_TRUE(BFloat16(2.0F) != BFloat16(1.25F));
 }
 
+TEST(BFloat16, NumericLimitTest) {
+    ASSERT_EQ(static_cast<float>(numeric_lowest<BFloat16>), -338953138925153547590470800371487866880.0F);
+    ASSERT_EQ(static_cast<float>(numeric_highest<BFloat16>), 338953138925153547590470800371487866880.0F);
+}
+
 }  // namespace kai::test
diff --git a/test/tests/float16_test.cpp b/test/tests/float16_test.cpp
index ea919fdd..8941810e 100644
--- a/test/tests/float16_test.cpp
+++ b/test/tests/float16_test.cpp
@@ -9,6 +9,7 @@
 #include <gtest/gtest.h>
 
 #include "test/common/cpu_info.hpp"
+#include "test/common/numeric_limits.hpp"
 
 namespace kai::test {
 
@@ -62,4 +63,9 @@ TEST(Float16, SimpleTest) {
     ASSERT_EQ(a, Float16(1.25F));
 }
 
+TEST(Float16, NumericLimitTest) {
+    ASSERT_EQ(static_cast<float>(numeric_lowest<Float16>), -65504.0F);
+    ASSERT_EQ(static_cast<float>(numeric_highest<Float16>), 65504.0F);
+}
+
 }  // namespace kai::test
-- 
GitLab