From e8cf652decf3671c404ac19d3092ecba22812b24 Mon Sep 17 00:00:00 2001
From: Michael Kozlov <michael.kozlov@arm.com>
Date: Wed, 22 Jan 2025 10:31:36 +0000
Subject: [PATCH 1/5] Implement Matmul Portion Testing in Int8 unit tests

To test a potential ukernel use case and to test offset helper functions.

Resolves: #COMPMID-7887

Signed-off-by: Michael Kozlov <michael.kozlov@arm.com>
---
 .../matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 122 ++++++++++++++----
 1 file changed, 100 insertions(+), 22 deletions(-)
diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
index ccdc1085..003f6758 100644
--- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
+++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
@@ -13,6 +13,7 @@
 #include <limits>
 #include <sstream>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.h"
@@ -24,13 +25,11 @@
 #include "kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi8cxp_qsi8cx_neon.h"
 #include "kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.h"
 #include "test/common/cpu_info.hpp"
+#include "test/common/matrix_portion.hpp"
 #include "test/common/memory.hpp"
-#include "test/common/round.hpp"
 #include "test/common/test_suite.hpp"
-#include "test/reference/cast.hpp"
 #include "test/reference/fill.hpp"
 #include "test/reference/matmul.hpp"
-#include "test/reference/pad.hpp"
 #include "test/reference/quantize.hpp"
 #include "test/reference/transpose.hpp"
 
@@ -47,10 +46,14 @@ static const std::array<UkernelVariant<kai_matmul_clamp_f32_qai8dxp_qsi8cxp_uker
          {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm),
           "kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm", cpu_has_i8mm}}};
 
-class MatMulTest_f32_qai8dxp_qsi8cxp : public UkernelVariantTest {};
+using MatMulTestParams_withPortions = std::tuple<size_t, MatMulShape, MatrixPortion>;
+
+class UkernelVariantTest_withPortions : public ::testing::TestWithParam<MatMulTestParams_withPortions> {};
+
+class MatMulTest_f32_qai8dxp_qsi8cxp : public UkernelVariantTest_withPortions {};
 
 TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_RHS) {
-    const auto& [variant_index, matmul_shape] = GetParam();
+    const auto& [variant_index, matmul_shape, portion] = GetParam();
     const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index);
 
     if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) {
@@ -74,7 +77,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_RHS) {
 }
 
 TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_LHS) {
-    const auto& [variant_index, matmul_shape] = GetParam();
+    const auto& [variant_index, matmul_shape, portion] = GetParam();
     const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index);
 
     if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) {
@@ -95,7 +98,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_LHS) {
 }
 
 TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) {
-    auto& [variant_index, matmul_shape] = GetParam();
+    auto& [variant_index, matmul_shape, portion] = GetParam();
     const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index);
 
     if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) {
@@ -132,11 +135,30 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) {
         ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits<float>::lowest(),
         std::numeric_limits<float>::max());
 
+    auto m_step = ukernel_variant.interface.get_m_step();
+    ASSERT_TRUE(m_step % mr == 0);
+
+    auto n_step = ukernel_variant.interface.get_n_step();
+    ASSERT_TRUE(n_step % nr == 0);
+
+    const auto rect = portion.compute_portion(M, N, m_step, n_step);
+    if (rect.height() == 0 || rect.width() == 0) {
+        GTEST_SKIP();
+    }
+
     // Runs the LHS packing micro-kernel.
     const auto imp_packed_lhs_size = kai_get_lhs_packed_size_lhs_quant_pack_qai8dxp_f32(M, K, mr, kr, sr);
     std::vector<uint8_t> imp_packed_lhs(imp_packed_lhs_size);
+
+    const auto lhs_start_row = rect.start_row();
+    size_t lhs_stride = K * sizeof(float);
+
+    auto lhs_offset = kai_get_lhs_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, lhs_stride);
+    auto lhs_packed_offset = kai_get_lhs_packed_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, K, mr, kr, sr);
+
     kai_run_lhs_quant_pack_qai8dxp_f32(
-        M, K, mr, kr, sr, 0, reinterpret_cast<const float*>(ref_lhs.data()), K * sizeof(float), imp_packed_lhs.data());
+        rect.height(), K, mr, kr, sr, 0, reinterpret_cast<const float*>(ref_lhs.data() + lhs_offset), lhs_stride,
+        imp_packed_lhs.data() + lhs_packed_offset);
 
     // Runs the RHS packing micro-kernel.
     //   * Generates the 8-bit signed symmetric quantized input for the micro-kernel.
@@ -150,19 +172,31 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) {
         reinterpret_cast<const float*>(ref_biases.data()), reinterpret_cast<const float*>(ref_rhs_scales.data()),
         imp_packed_rhs.data(), 0, &params);
 
+    const auto packed_rhs_start_row = rect.start_col();
+    auto rhs_packed_offset =
+        kai_get_rhs_packed_offset_rhs_pack_nxk_qsi8cxp_qsi8cx_neon(packed_rhs_start_row, K, nr, kr, sr);
+
+    const auto dst_stride = N * sizeof(float);
+    const auto dst_offset = ukernel_variant.interface.get_dst_offset(rect.start_row(), rect.start_col(), dst_stride);
+    const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float);
+    ASSERT_EQ(dst_offset, ref_dst_offset);
+
     // Runs the GEMM micro-kernel.
     const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N);
     ASSERT_EQ(imp_dst_size, ref_dst.size());
     std::vector<uint8_t> imp_dst(imp_dst_size);
     ukernel_variant.interface.run_matmul(
-        M, N, K, imp_packed_lhs.data(), imp_packed_rhs.data(), reinterpret_cast<float*>(imp_dst.data()),
+        rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset,
+        imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast<float*>(imp_dst.data() + dst_offset),
         N * sizeof(float), sizeof(float), std::numeric_limits<float>::lowest(), std::numeric_limits<float>::max());
 
     // Compares the output of the micro-kernels against the output of the reference implementation.
-    for (size_t y = 0; y < M; ++y) {
-        for (size_t x = 0; x < N; ++x) {
-            const auto imp_value = read_array<float>(imp_dst.data(), y * N + x);
-            const auto ref_value = read_array<float>(ref_dst.data(), y * N + x);
+    for (size_t y = 0; y < rect.height(); ++y) {
+        for (size_t x = 0; x < rect.width(); ++x) {
+            const auto imp_value =
+                read_array<float>(imp_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col()));
+            const auto ref_value =
+                read_array<float>(ref_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col()));
             const auto rel_error = ref_value != 0 ? std::abs((imp_value - ref_value) / ref_value) : std::abs(imp_value);
 
             if (rel_error > 0.0001F) {
@@ -173,7 +207,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) {
 }
 
 TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) {
-    auto& [variant_index, matmul_shape] = GetParam();
+    auto& [variant_index, matmul_shape, portion] = GetParam();
     const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index);
 
     if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) {
@@ -221,11 +255,29 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) {
         ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits<float>::lowest(),
         std::numeric_limits<float>::max());
 
+    auto m_step = ukernel_variant.interface.get_m_step();
+    ASSERT_TRUE(m_step % mr == 0);
+
+    auto n_step = ukernel_variant.interface.get_n_step();
+    ASSERT_TRUE(n_step % nr == 0);
+
+    const auto rect = portion.compute_portion(M, N, m_step, n_step);
+    if (rect.height() == 0 || rect.width() == 0) {
+        GTEST_SKIP();
+    }
+
+    const auto lhs_start_row = rect.start_row();
+    size_t const lhs_stride = K * sizeof(float);
+
+    auto lhs_offset = kai_get_lhs_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, lhs_stride);
+    auto lhs_packed_offset = kai_get_lhs_packed_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, K, mr, kr, sr);
+
     // Runs the LHS packing micro-kernel.
     const auto imp_packed_lhs_size = kai_get_lhs_packed_size_lhs_quant_pack_qai8dxp_f32(M, K, mr, kr, sr);
     std::vector<uint8_t> imp_packed_lhs(imp_packed_lhs_size);
     kai_run_lhs_quant_pack_qai8dxp_f32(
-        M, K, mr, kr, sr, 0, reinterpret_cast<const float*>(ref_lhs.data()), K * sizeof(float), imp_packed_lhs.data());
+        rect.height(), K, mr, kr, sr, 0, reinterpret_cast<const float*>(ref_lhs.data() + lhs_offset), K * sizeof(float),
+        imp_packed_lhs.data() + lhs_packed_offset);
 
     // Runs the RHS packing micro-kernel.
     //   * Generates the 8-bit signed symmetric quantized input for the micro-kernel.
@@ -239,19 +291,31 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) {
         reinterpret_cast<const float*>(ref_biases.data()), reinterpret_cast<const float*>(ref_rhs_scales.data()),
         imp_packed_rhs.data(), 0, &params);
 
+    const auto packed_rhs_start_row = rect.start_col();
+    auto rhs_packed_offset =
+        kai_get_rhs_packed_offset_rhs_pack_kxn_qsi8cxp_qsi8cx_neon(packed_rhs_start_row, K, nr, kr, sr);
+
+    const auto dst_stride = N * sizeof(float);
+    const auto dst_offset = ukernel_variant.interface.get_dst_offset(rect.start_row(), rect.start_col(), dst_stride);
+    const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float);
+    ASSERT_EQ(dst_offset, ref_dst_offset);
+
     // Runs the GEMM micro-kernel.
     const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N);
     ASSERT_EQ(imp_dst_size, ref_dst.size());
     std::vector<uint8_t> imp_dst(imp_dst_size);
     ukernel_variant.interface.run_matmul(
-        M, N, K, imp_packed_lhs.data(), imp_packed_rhs.data(), reinterpret_cast<float*>(imp_dst.data()),
+        rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset,
+        imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast<float*>(imp_dst.data() + dst_offset),
         N * sizeof(float), sizeof(float), std::numeric_limits<float>::lowest(), std::numeric_limits<float>::max());
 
     // Compares the output of the micro-kernels against the output of the reference implementation.
-    for (size_t y = 0; y < M; ++y) {
-        for (size_t x = 0; x < N; ++x) {
-            const auto imp_value = read_array<float>(imp_dst.data(), y * N + x);
-            const auto ref_value = read_array<float>(ref_dst.data(), y * N + x);
+    for (size_t y = 0; y < rect.height(); ++y) {
+        for (size_t x = 0; x < rect.width(); ++x) {
+            const auto imp_value =
+                read_array<float>(imp_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col()));
+            const auto ref_value =
+                read_array<float>(ref_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col()));
             const auto rel_error = ref_value != 0 ? std::abs((imp_value - ref_value) / ref_value) : std::abs(imp_value);
 
             if (rel_error > 0.0001F) {
@@ -265,14 +329,28 @@ INSTANTIATE_TEST_SUITE_P(
     MatMul, MatMulTest_f32_qai8dxp_qsi8cxp,
     testing::Combine(
         testing::Range<size_t>(0, variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.size()),
-        testing::Values(MatMulShape{17, 33, 67}, MatMulShape{19, 35, 63}, MatMulShape{1, 27, 31})),
+        testing::Values(
+            MatMulShape{17, 33, 67},  //
+            MatMulShape{19, 35, 63},  //
+            MatMulShape{1, 27, 31}),
+        testing::Values(
+            MatrixPortion(0, 0, 1, 1),     // Full matrix.
+            MatrixPortion(0, 0, 1, 0.25),  // Leftmost portion.
+            MatrixPortion(0, 0.75, 1, 1),  // Rightmost portion.
+            MatrixPortion(0, 0.5, 1, 0.8)  // Somewhere Middle
+            )),
     [](const auto& info) {
         const auto variant_idx = std::get<0>(info.param);
         const std::string name{variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_idx).name};
         const auto shape = std::get<MatMulShape>(info.param);
+        const auto portion = std::get<MatrixPortion>(info.param);
 
         std::stringstream sstream;
-        sstream << name << "__M_" << shape.m << "__N_" << shape.n << "__K_" << shape.k;
+        sstream << name << "__M_" << shape.m << "__N_" << shape.n << "__K_" << shape.k   //
+                << "__PortionStartRow_" << static_cast<int>(portion.start_row() * 1000)  //
+                << "__PortionStartCol_" << static_cast<int>(portion.start_col() * 1000)  //
+                << "__PortionHeight_" << static_cast<int>(portion.height() * 1000)       //
+                << "__PortionWidth_" << static_cast<int>(portion.width() * 1000);
         return sstream.str();
     });
 
-- 
GitLab


From 518b12e47df0e9980e85f443e2cfb4de4e91affe Mon Sep 17 00:00:00 2001
From: Michael Kozlov <michael.kozlov@arm.com>
Date: Fri, 7 Feb 2025 11:57:06 +0000
Subject: [PATCH 2/5] Use matmul packed offsets and assert equality with
 packing kernel packed offsets

Signed-off-by: Michael Kozlov <michael.kozlov@arm.com>
---
 .../matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp  | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
index 003f6758..1ba4e224 100644
--- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
+++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
@@ -181,13 +181,18 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) {
     const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float);
     ASSERT_EQ(dst_offset, ref_dst_offset);
 
+    const auto matmul_lhs_packed_offset = ukernel_variant.interface.get_lhs_packed_offset(rect.start_row(), K);
+    ASSERT_EQ(lhs_packed_offset, matmul_lhs_packed_offset);
+    const auto matmul_rhs_packed_offset = ukernel_variant.interface.get_rhs_packed_offset(rect.start_col(), K);
+    ASSERT_EQ(rhs_packed_offset, matmul_rhs_packed_offset);
+
     // Runs the GEMM micro-kernel.
     const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N);
     ASSERT_EQ(imp_dst_size, ref_dst.size());
     std::vector<uint8_t> imp_dst(imp_dst_size);
     ukernel_variant.interface.run_matmul(
-        rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset,
-        imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast<float*>(imp_dst.data() + dst_offset),
+        rect.height(), rect.width(), K, imp_packed_lhs.data() + matmul_lhs_packed_offset,
+        imp_packed_rhs.data() + matmul_rhs_packed_offset, reinterpret_cast<float*>(imp_dst.data() + dst_offset),
         N * sizeof(float), sizeof(float), std::numeric_limits<float>::lowest(), std::numeric_limits<float>::max());
 
     // Compares the output of the micro-kernels against the output of the reference implementation.
@@ -300,13 +305,18 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) {
     const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float);
     ASSERT_EQ(dst_offset, ref_dst_offset);
 
+    const auto matmul_lhs_packed_offset = ukernel_variant.interface.get_lhs_packed_offset(rect.start_row(), K);
+    ASSERT_EQ(lhs_packed_offset, matmul_lhs_packed_offset);
+    const auto matmul_rhs_packed_offset = ukernel_variant.interface.get_rhs_packed_offset(rect.start_col(), K);
+    ASSERT_EQ(rhs_packed_offset, matmul_rhs_packed_offset);
+
     // Runs the GEMM micro-kernel.
     const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N);
     ASSERT_EQ(imp_dst_size, ref_dst.size());
     std::vector<uint8_t> imp_dst(imp_dst_size);
     ukernel_variant.interface.run_matmul(
-        rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset,
-        imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast<float*>(imp_dst.data() + dst_offset),
+        rect.height(), rect.width(), K, imp_packed_lhs.data() + matmul_lhs_packed_offset,
+        imp_packed_rhs.data() + matmul_rhs_packed_offset, reinterpret_cast<float*>(imp_dst.data() + dst_offset),
         N * sizeof(float), sizeof(float), std::numeric_limits<float>::lowest(), std::numeric_limits<float>::max());
 
     // Compares the output of the micro-kernels against the output of the reference implementation.
-- 
GitLab


From 399d3711a9b0b6215dae1ebdba1f4b49a4c3338b Mon Sep 17 00:00:00 2001
From: Michael Kozlov <michael.kozlov@arm.com>
Date: Mon, 10 Feb 2025 11:27:13 +0000
Subject: [PATCH 3/5] Address review comments

Signed-off-by: Michael Kozlov <michael.kozlov@arm.com>
---
 test/common/test_suite.hpp                           | 4 +++-
 test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 8 +++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/common/test_suite.hpp b/test/common/test_suite.hpp
index 7c30fea8..cc791037 100644
--- a/test/common/test_suite.hpp
+++ b/test/common/test_suite.hpp
@@ -12,7 +12,8 @@
 #include <functional>
 #include <string_view>
 #include <tuple>
-#include <utility>
+
+#include "matrix_portion.hpp"
 
 // clang-format off
 #define UKERNEL_MATMUL_VARIANT(name)          \
@@ -79,6 +80,7 @@ struct MatMulShape {
 
 /// Matrix multiplication test information.
 using MatMulTestParams = std::tuple<size_t, MatMulShape>;
+using MatMulTestPortionedParams = std::tuple<size_t, MatMulShape, MatrixPortion>;
 
 class UkernelVariantTest : public ::testing::TestWithParam<MatMulTestParams> {};
 
diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
index 1ba4e224..eac62092 100644
--- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
+++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
@@ -46,9 +46,7 @@ static const std::array<UkernelVariant<kai_matmul_clamp_f32_qai8dxp_qsi8cxp_uker
          {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm),
           "kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm", cpu_has_i8mm}}};
 
-using MatMulTestParams_withPortions = std::tuple<size_t, MatMulShape, MatrixPortion>;
-
-class UkernelVariantTest_withPortions : public ::testing::TestWithParam<MatMulTestParams_withPortions> {};
+class UkernelVariantTest_withPortions : public ::testing::TestWithParam<MatMulTestPortionedParams> {};
 
 class MatMulTest_f32_qai8dxp_qsi8cxp : public UkernelVariantTest_withPortions {};
 
@@ -143,7 +141,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) {
 
     const auto rect = portion.compute_portion(M, N, m_step, n_step);
     if (rect.height() == 0 || rect.width() == 0) {
-        GTEST_SKIP();
+        GTEST_SKIP() << "Skipping empty portion.";
     }
 
     // Runs the LHS packing micro-kernel.
@@ -268,7 +266,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) {
 
     const auto rect = portion.compute_portion(M, N, m_step, n_step);
     if (rect.height() == 0 || rect.width() == 0) {
-        GTEST_SKIP();
+        GTEST_SKIP() << "Skipping empty portion.";
     }
 
     const auto lhs_start_row = rect.start_row();
-- 
GitLab


From 4b87d1660594494d771482c02aa0ec4566e41156 Mon Sep 17 00:00:00 2001
From: Michael Kozlov <michael.kozlov@arm.com>
Date: Mon, 10 Feb 2025 13:08:02 +0000
Subject: [PATCH 4/5] Add portion cases with non-zero start_row

Signed-off-by: Michael Kozlov <michael.kozlov@arm.com>
---
 test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
index eac62092..0741a26e 100644
--- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
+++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
@@ -342,10 +342,13 @@ INSTANTIATE_TEST_SUITE_P(
             MatMulShape{19, 35, 63},  //
             MatMulShape{1, 27, 31}),
         testing::Values(
-            MatrixPortion(0, 0, 1, 1),     // Full matrix.
-            MatrixPortion(0, 0, 1, 0.25),  // Leftmost portion.
-            MatrixPortion(0, 0.75, 1, 1),  // Rightmost portion.
-            MatrixPortion(0, 0.5, 1, 0.8)  // Somewhere Middle
+            MatrixPortion(0, 0, 1, 1),         // Full matrix.
+            MatrixPortion(0, 0, 1, 0.25),      // Leftmost portion.
+            MatrixPortion(0, 0.75, 1, 1),      // Rightmost portion.
+            MatrixPortion(0, 0.5, 1, 0.8),     // Somewhere Middle
+            MatrixPortion(0.75, 0.75, 1, 1),   // Bottom-right corner.
+            MatrixPortion(0.75, 0, 1, 1),      // Partial rows
+            MatrixPortion(0.4, 0.5, 0.6, 0.8)  // Somewhere Middle
             )),
     [](const auto& info) {
         const auto variant_idx = std::get<0>(info.param);
-- 
GitLab


From 6b17668797ac4e12d46e645ca68dbe15ecf0eb16 Mon Sep 17 00:00:00 2001
From: Michael Kozlov <michael.kozlov@arm.com>
Date: Mon, 10 Feb 2025 15:44:22 +0000
Subject: [PATCH 5/5] Refactor test class

Signed-off-by: Michael Kozlov <michael.kozlov@arm.com>
---
 test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
index 0741a26e..4dd69e4b 100644
--- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
+++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp
@@ -46,9 +46,7 @@ static const std::array<UkernelVariant<kai_matmul_clamp_f32_qai8dxp_qsi8cxp_uker
          {UKERNEL_MATMUL_VARIANT(clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm),
           "kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm", cpu_has_i8mm}}};
 
-class UkernelVariantTest_withPortions : public ::testing::TestWithParam<MatMulTestPortionedParams> {};
-
-class MatMulTest_f32_qai8dxp_qsi8cxp : public UkernelVariantTest_withPortions {};
+class MatMulTest_f32_qai8dxp_qsi8cxp : public ::testing::TestWithParam<MatMulTestPortionedParams> {};
 
 TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_RHS) {
     const auto& [variant_index, matmul_shape, portion] = GetParam();
-- 
GitLab