From e8cf652decf3671c404ac19d3092ecba22812b24 Mon Sep 17 00:00:00 2001 From: Michael Kozlov Date: Wed, 22 Jan 2025 10:31:36 +0000 Subject: [PATCH 1/5] Implement Matmul Portion Testing in Int8 unit tests To test a potential ukernel use case and to test offset helper functions. Resolves: #COMPMID-7887 Signed-off-by: Michael Kozlov --- .../matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 122 ++++++++++++++---- 1 file changed, 100 insertions(+), 22 deletions(-) diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index ccdc1085..003f6758 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.h" @@ -24,13 +25,11 @@ #include "kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi8cxp_qsi8cx_neon.h" #include "kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.h" #include "test/common/cpu_info.hpp" +#include "test/common/matrix_portion.hpp" #include "test/common/memory.hpp" -#include "test/common/round.hpp" #include "test/common/test_suite.hpp" -#include "test/reference/cast.hpp" #include "test/reference/fill.hpp" #include "test/reference/matmul.hpp" -#include "test/reference/pad.hpp" #include "test/reference/quantize.hpp" #include "test/reference/transpose.hpp" @@ -47,10 +46,14 @@ static const std::array; + +class UkernelVariantTest_withPortions : public ::testing::TestWithParam {}; + +class MatMulTest_f32_qai8dxp_qsi8cxp : public UkernelVariantTest_withPortions {}; TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_RHS) { - const auto& [variant_index, matmul_shape] = GetParam(); + const auto& [variant_index, matmul_shape, portion] = GetParam(); const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index); if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) { @@ -74,7 +77,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_RHS) { } TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_LHS) { - const auto& [variant_index, matmul_shape] = GetParam(); + const auto& [variant_index, matmul_shape, portion] = GetParam(); const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index); if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) { @@ -95,7 +98,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_LHS) { } TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { - auto& [variant_index, matmul_shape] = GetParam(); + auto& [variant_index, matmul_shape, portion] = GetParam(); const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index); if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) { @@ -132,11 +135,30 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); + auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + auto n_step = ukernel_variant.interface.get_n_step(); + ASSERT_TRUE(n_step % nr == 0); + + const auto rect = portion.compute_portion(M, N, m_step, n_step); + if (rect.height() == 0 || rect.width() == 0) { + GTEST_SKIP(); + } + // Runs the LHS packing micro-kernel. const auto imp_packed_lhs_size = kai_get_lhs_packed_size_lhs_quant_pack_qai8dxp_f32(M, K, mr, kr, sr); std::vector imp_packed_lhs(imp_packed_lhs_size); + + const auto lhs_start_row = rect.start_row(); + size_t lhs_stride = K * sizeof(float); + + auto lhs_offset = kai_get_lhs_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, lhs_stride); + auto lhs_packed_offset = kai_get_lhs_packed_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, K, mr, kr, sr); + kai_run_lhs_quant_pack_qai8dxp_f32( - M, K, mr, kr, sr, 0, reinterpret_cast(ref_lhs.data()), K * sizeof(float), imp_packed_lhs.data()); + rect.height(), K, mr, kr, sr, 0, reinterpret_cast(ref_lhs.data() + lhs_offset), lhs_stride, + imp_packed_lhs.data() + lhs_packed_offset); // Runs the RHS packing micro-kernel. // * Generates the 8-bit signed symmetric quantized input for the micro-kernel. @@ -150,19 +172,31 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { reinterpret_cast(ref_biases.data()), reinterpret_cast(ref_rhs_scales.data()), imp_packed_rhs.data(), 0, ¶ms); + const auto packed_rhs_start_row = rect.start_col(); + auto rhs_packed_offset = + kai_get_rhs_packed_offset_rhs_pack_nxk_qsi8cxp_qsi8cx_neon(packed_rhs_start_row, K, nr, kr, sr); + + const auto dst_stride = N * sizeof(float); + const auto dst_offset = ukernel_variant.interface.get_dst_offset(rect.start_row(), rect.start_col(), dst_stride); + const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float); + ASSERT_EQ(dst_offset, ref_dst_offset); + // Runs the GEMM micro-kernel. const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N); ASSERT_EQ(imp_dst_size, ref_dst.size()); std::vector imp_dst(imp_dst_size); ukernel_variant.interface.run_matmul( - M, N, K, imp_packed_lhs.data(), imp_packed_rhs.data(), reinterpret_cast(imp_dst.data()), + rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset, + imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast(imp_dst.data() + dst_offset), N * sizeof(float), sizeof(float), std::numeric_limits::lowest(), std::numeric_limits::max()); // Compares the output of the micro-kernels against the output of the reference implementation. - for (size_t y = 0; y < M; ++y) { - for (size_t x = 0; x < N; ++x) { - const auto imp_value = read_array(imp_dst.data(), y * N + x); - const auto ref_value = read_array(ref_dst.data(), y * N + x); + for (size_t y = 0; y < rect.height(); ++y) { + for (size_t x = 0; x < rect.width(); ++x) { + const auto imp_value = + read_array(imp_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col())); + const auto ref_value = + read_array(ref_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col())); const auto rel_error = ref_value != 0 ? std::abs((imp_value - ref_value) / ref_value) : std::abs(imp_value); if (rel_error > 0.0001F) { @@ -173,7 +207,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { } TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { - auto& [variant_index, matmul_shape] = GetParam(); + auto& [variant_index, matmul_shape, portion] = GetParam(); const auto& ukernel_variant = variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_index); if (ukernel_variant.fn_is_supported && !ukernel_variant.fn_is_supported()) { @@ -221,11 +255,29 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); + auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + auto n_step = ukernel_variant.interface.get_n_step(); + ASSERT_TRUE(n_step % nr == 0); + + const auto rect = portion.compute_portion(M, N, m_step, n_step); + if (rect.height() == 0 || rect.width() == 0) { + GTEST_SKIP(); + } + + const auto lhs_start_row = rect.start_row(); + size_t const lhs_stride = K * sizeof(float); + + auto lhs_offset = kai_get_lhs_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, lhs_stride); + auto lhs_packed_offset = kai_get_lhs_packed_offset_lhs_quant_pack_qai8dxp_f32(lhs_start_row, K, mr, kr, sr); + // Runs the LHS packing micro-kernel. const auto imp_packed_lhs_size = kai_get_lhs_packed_size_lhs_quant_pack_qai8dxp_f32(M, K, mr, kr, sr); std::vector imp_packed_lhs(imp_packed_lhs_size); kai_run_lhs_quant_pack_qai8dxp_f32( - M, K, mr, kr, sr, 0, reinterpret_cast(ref_lhs.data()), K * sizeof(float), imp_packed_lhs.data()); + rect.height(), K, mr, kr, sr, 0, reinterpret_cast(ref_lhs.data() + lhs_offset), K * sizeof(float), + imp_packed_lhs.data() + lhs_packed_offset); // Runs the RHS packing micro-kernel. // * Generates the 8-bit signed symmetric quantized input for the micro-kernel. @@ -239,19 +291,31 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { reinterpret_cast(ref_biases.data()), reinterpret_cast(ref_rhs_scales.data()), imp_packed_rhs.data(), 0, ¶ms); + const auto packed_rhs_start_row = rect.start_col(); + auto rhs_packed_offset = + kai_get_rhs_packed_offset_rhs_pack_kxn_qsi8cxp_qsi8cx_neon(packed_rhs_start_row, K, nr, kr, sr); + + const auto dst_stride = N * sizeof(float); + const auto dst_offset = ukernel_variant.interface.get_dst_offset(rect.start_row(), rect.start_col(), dst_stride); + const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float); + ASSERT_EQ(dst_offset, ref_dst_offset); + // Runs the GEMM micro-kernel. const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N); ASSERT_EQ(imp_dst_size, ref_dst.size()); std::vector imp_dst(imp_dst_size); ukernel_variant.interface.run_matmul( - M, N, K, imp_packed_lhs.data(), imp_packed_rhs.data(), reinterpret_cast(imp_dst.data()), + rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset, + imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast(imp_dst.data() + dst_offset), N * sizeof(float), sizeof(float), std::numeric_limits::lowest(), std::numeric_limits::max()); // Compares the output of the micro-kernels against the output of the reference implementation. - for (size_t y = 0; y < M; ++y) { - for (size_t x = 0; x < N; ++x) { - const auto imp_value = read_array(imp_dst.data(), y * N + x); - const auto ref_value = read_array(ref_dst.data(), y * N + x); + for (size_t y = 0; y < rect.height(); ++y) { + for (size_t x = 0; x < rect.width(); ++x) { + const auto imp_value = + read_array(imp_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col())); + const auto ref_value = + read_array(ref_dst.data(), (rect.start_row() + y) * N + (x + rect.start_col())); const auto rel_error = ref_value != 0 ? std::abs((imp_value - ref_value) / ref_value) : std::abs(imp_value); if (rel_error > 0.0001F) { @@ -265,14 +329,28 @@ INSTANTIATE_TEST_SUITE_P( MatMul, MatMulTest_f32_qai8dxp_qsi8cxp, testing::Combine( testing::Range(0, variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.size()), - testing::Values(MatMulShape{17, 33, 67}, MatMulShape{19, 35, 63}, MatMulShape{1, 27, 31})), + testing::Values( + MatMulShape{17, 33, 67}, // + MatMulShape{19, 35, 63}, // + MatMulShape{1, 27, 31}), + testing::Values( + MatrixPortion(0, 0, 1, 1), // Full matrix. + MatrixPortion(0, 0, 1, 0.25), // Leftmost portion. + MatrixPortion(0, 0.75, 1, 1), // Rightmost portion. + MatrixPortion(0, 0.5, 1, 0.8) // Somewhere Middle + )), [](const auto& info) { const auto variant_idx = std::get<0>(info.param); const std::string name{variants_kai_matmul_clamp_f32_qai8dxp_qsi8cxp.at(variant_idx).name}; const auto shape = std::get(info.param); + const auto portion = std::get(info.param); std::stringstream sstream; - sstream << name << "__M_" << shape.m << "__N_" << shape.n << "__K_" << shape.k; + sstream << name << "__M_" << shape.m << "__N_" << shape.n << "__K_" << shape.k // + << "__PortionStartRow_" << static_cast(portion.start_row() * 1000) // + << "__PortionStartCol_" << static_cast(portion.start_col() * 1000) // + << "__PortionHeight_" << static_cast(portion.height() * 1000) // + << "__PortionWidth_" << static_cast(portion.width() * 1000); return sstream.str(); }); -- GitLab From 518b12e47df0e9980e85f443e2cfb4de4e91affe Mon Sep 17 00:00:00 2001 From: Michael Kozlov Date: Fri, 7 Feb 2025 11:57:06 +0000 Subject: [PATCH 2/5] Use matmul packed offsets and assert equality with packing kernel packed offsets Signed-off-by: Michael Kozlov --- .../matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index 003f6758..1ba4e224 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -181,13 +181,18 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float); ASSERT_EQ(dst_offset, ref_dst_offset); + const auto matmul_lhs_packed_offset = ukernel_variant.interface.get_lhs_packed_offset(rect.start_row(), K); + ASSERT_EQ(lhs_packed_offset, matmul_lhs_packed_offset); + const auto matmul_rhs_packed_offset = ukernel_variant.interface.get_rhs_packed_offset(rect.start_col(), K); + ASSERT_EQ(rhs_packed_offset, matmul_rhs_packed_offset); + // Runs the GEMM micro-kernel. const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N); ASSERT_EQ(imp_dst_size, ref_dst.size()); std::vector imp_dst(imp_dst_size); ukernel_variant.interface.run_matmul( - rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset, - imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast(imp_dst.data() + dst_offset), + rect.height(), rect.width(), K, imp_packed_lhs.data() + matmul_lhs_packed_offset, + imp_packed_rhs.data() + matmul_rhs_packed_offset, reinterpret_cast(imp_dst.data() + dst_offset), N * sizeof(float), sizeof(float), std::numeric_limits::lowest(), std::numeric_limits::max()); // Compares the output of the micro-kernels against the output of the reference implementation. @@ -300,13 +305,18 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { const auto ref_dst_offset = rect.start_row() * dst_stride + rect.start_col() * sizeof(float); ASSERT_EQ(dst_offset, ref_dst_offset); + const auto matmul_lhs_packed_offset = ukernel_variant.interface.get_lhs_packed_offset(rect.start_row(), K); + ASSERT_EQ(lhs_packed_offset, matmul_lhs_packed_offset); + const auto matmul_rhs_packed_offset = ukernel_variant.interface.get_rhs_packed_offset(rect.start_col(), K); + ASSERT_EQ(rhs_packed_offset, matmul_rhs_packed_offset); + // Runs the GEMM micro-kernel. const auto imp_dst_size = ukernel_variant.interface.get_dst_size(M, N); ASSERT_EQ(imp_dst_size, ref_dst.size()); std::vector imp_dst(imp_dst_size); ukernel_variant.interface.run_matmul( - rect.height(), rect.width(), K, imp_packed_lhs.data() + lhs_packed_offset, - imp_packed_rhs.data() + rhs_packed_offset, reinterpret_cast(imp_dst.data() + dst_offset), + rect.height(), rect.width(), K, imp_packed_lhs.data() + matmul_lhs_packed_offset, + imp_packed_rhs.data() + matmul_rhs_packed_offset, reinterpret_cast(imp_dst.data() + dst_offset), N * sizeof(float), sizeof(float), std::numeric_limits::lowest(), std::numeric_limits::max()); // Compares the output of the micro-kernels against the output of the reference implementation. -- GitLab From 399d3711a9b0b6215dae1ebdba1f4b49a4c3338b Mon Sep 17 00:00:00 2001 From: Michael Kozlov Date: Mon, 10 Feb 2025 11:27:13 +0000 Subject: [PATCH 3/5] Address review comments Signed-off-by: Michael Kozlov --- test/common/test_suite.hpp | 4 +++- test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 8 +++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/common/test_suite.hpp b/test/common/test_suite.hpp index 7c30fea8..cc791037 100644 --- a/test/common/test_suite.hpp +++ b/test/common/test_suite.hpp @@ -12,7 +12,8 @@ #include #include #include -#include + +#include "matrix_portion.hpp" // clang-format off #define UKERNEL_MATMUL_VARIANT(name) \ @@ -79,6 +80,7 @@ struct MatMulShape { /// Matrix multiplication test information. using MatMulTestParams = std::tuple; +using MatMulTestPortionedParams = std::tuple; class UkernelVariantTest : public ::testing::TestWithParam {}; diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index 1ba4e224..eac62092 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -46,9 +46,7 @@ static const std::array; - -class UkernelVariantTest_withPortions : public ::testing::TestWithParam {}; +class UkernelVariantTest_withPortions : public ::testing::TestWithParam {}; class MatMulTest_f32_qai8dxp_qsi8cxp : public UkernelVariantTest_withPortions {}; @@ -143,7 +141,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { const auto rect = portion.compute_portion(M, N, m_step, n_step); if (rect.height() == 0 || rect.width() == 0) { - GTEST_SKIP(); + GTEST_SKIP() << "Skipping empty portion."; } // Runs the LHS packing micro-kernel. @@ -268,7 +266,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { const auto rect = portion.compute_portion(M, N, m_step, n_step); if (rect.height() == 0 || rect.width() == 0) { - GTEST_SKIP(); + GTEST_SKIP() << "Skipping empty portion."; } const auto lhs_start_row = rect.start_row(); -- GitLab From 4b87d1660594494d771482c02aa0ec4566e41156 Mon Sep 17 00:00:00 2001 From: Michael Kozlov Date: Mon, 10 Feb 2025 13:08:02 +0000 Subject: [PATCH 4/5] Add portion cases with non-zero start_row Signed-off-by: Michael Kozlov --- test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index eac62092..0741a26e 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -342,10 +342,13 @@ INSTANTIATE_TEST_SUITE_P( MatMulShape{19, 35, 63}, // MatMulShape{1, 27, 31}), testing::Values( - MatrixPortion(0, 0, 1, 1), // Full matrix. - MatrixPortion(0, 0, 1, 0.25), // Leftmost portion. - MatrixPortion(0, 0.75, 1, 1), // Rightmost portion. - MatrixPortion(0, 0.5, 1, 0.8) // Somewhere Middle + MatrixPortion(0, 0, 1, 1), // Full matrix. + MatrixPortion(0, 0, 1, 0.25), // Leftmost portion. + MatrixPortion(0, 0.75, 1, 1), // Rightmost portion. + MatrixPortion(0, 0.5, 1, 0.8), // Somewhere Middle + MatrixPortion(0.75, 0.75, 1, 1), // Bottom-right corner. + MatrixPortion(0.75, 0, 1, 1), // Partial rows + MatrixPortion(0.4, 0.5, 0.6, 0.8) // Somewhere Middle )), [](const auto& info) { const auto variant_idx = std::get<0>(info.param); -- GitLab From 6b17668797ac4e12d46e645ca68dbe15ecf0eb16 Mon Sep 17 00:00:00 2001 From: Michael Kozlov Date: Mon, 10 Feb 2025 15:44:22 +0000 Subject: [PATCH 5/5] Refactor test class Signed-off-by: Michael Kozlov --- test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index 0741a26e..4dd69e4b 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -46,9 +46,7 @@ static const std::array {}; - -class MatMulTest_f32_qai8dxp_qsi8cxp : public UkernelVariantTest_withPortions {}; +class MatMulTest_f32_qai8dxp_qsi8cxp : public ::testing::TestWithParam {}; TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, Offset_RHS) { const auto& [variant_index, matmul_shape, portion] = GetParam(); -- GitLab