From 674b0056052382ed1511118c0984b0d1eb8bb97b Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Tue, 25 Feb 2025 09:48:52 +0000 Subject: [PATCH 1/8] add KAI_ASSUME(m==1) to all gemv ukernels Signed-off-by: Evie Wright --- ...ai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c | 1 + ...ai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c | 1 + ...matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c | 1 + ...ai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c | 1 + ...matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c | 1 + ...ai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c | 1 + ...kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c | 1 + ..._matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c | 1 + ..._matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c | 1 + ...kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c | 1 + ...kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c | 1 + 11 files changed, 11 insertions(+) diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c index c7fac421..843b5c82 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c index ee62e401..3974bccf 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index 3fc3cfc6..67e2bd5f 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c index ad9327f4..b71cd955 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c index 4738d9aa..06318517 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c index b8e2f832..45b54cdf 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c @@ -115,6 +115,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot( float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0 || n == 0 || k == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c index ab136e1a..d56a7205 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c @@ -126,6 +126,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod( float scalar_min, // float scalar_max) { KAI_ASSUME(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c index 03cf6abb..20b178f5 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c @@ -103,6 +103,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod( float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c index c9122f5a..4bd47c15 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c @@ -103,6 +103,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod( float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c index bd5246fa..1d159b4f 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c @@ -124,6 +124,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod( float scalar_min, // float scalar_max) { KAI_ASSUME(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c index 784a1165..4367df9f 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c @@ -124,6 +124,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod( float scalar_min, // float scalar_max) { KAI_ASSUME(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; -- GitLab From 405cc8387d7666d2d28b79d14ca34720bc4ecc5d Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Fri, 28 Feb 2025 13:08:02 +0000 Subject: [PATCH 2/8] add check for extra ukernel Signed-off-by: Evie Wright --- ...atmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index 69096ff7..e4f2bcfd 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -115,6 +115,7 @@ void kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod( size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSUME(bl == kai_bl); KAI_ASSUME(k % kai_bl == 0); + KAI_ASSUME(m == 1); KAI_ASSUME(dst_stride_col == sizeof(float)); if (m == 0) { -- GitLab From c9b54a6a3ee49daa9d67027b689aa6549df81f20 Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Fri, 28 Feb 2025 15:43:21 +0000 Subject: [PATCH 3/8] modify example tests to skip gemv kernels for non-gemv shapes Signed-off-by: Evie Wright --- .../matmul_clamp_f32_qai8dxp_qsi4c32p.cpp | 13 ++++++++++--- .../matmul_clamp_f32_qai8dxp_qsi4cxp.cpp | 7 ++++++- .../matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp | 7 ++++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp index 4ce0e7b4..64e6d34b 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -628,6 +628,15 @@ int main() { //------------------------------------ //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { + std::cout << "TEST[" << idx_variant << "]: Dynamic quantization + matmul" << std::endl; + std::cout << "- ukernel: " << ukernel_variants[idx_variant].name << std::endl; + // Skip gemv kernels for non-gemv shapes. Gemv kernels are optimized for m=1 only + if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { + std::cout << "Status: SKIPPED" << std::endl; + std::cout << "------------" << std::endl; + continue; + } + // Get the packing parameters const size_t mr = ukernel_variants[idx_variant].ukernel.get_mr(); const size_t nr = ukernel_variants[idx_variant].ukernel.get_nr(); @@ -735,8 +744,6 @@ int main() { const bool is_valid = is_output_correct(m, n, 0.0001f, (const float*)dst_ref_mtx_f32, (const float*)dst_act_mtx_f32); - std::cout << "TEST[" << idx_variant << "]: Dynamic quantization + matmul" << std::endl; - std::cout << "- ukernel: " << ukernel_variants[idx_variant].name << std::endl; if (is_valid) { std::cout << "- Status: PASSED" << std::endl; std::cout << "- Performance: " << elap.count() << " us" << std::endl; diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp index 53e64664..ef529954 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -606,6 +606,11 @@ int main(int argc, char** argv) { //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; + // Skip gemv kernels for non-gemv shapes. Gemv kernels are optimized for m=1 only + if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { + printf("TEST[%ld] = SKIPPED\n", idx_variant); + continue; + } // Get the packing parameters const size_t mr = ukernel_variants[idx_variant].ukernel.get_mr(); diff --git a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp index 6d992b62..118f14ca 100644 --- a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp +++ b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -341,6 +341,11 @@ int main(int argc, char** argv) { //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; + // Skip gemv kernels for non-gemv shapes. Gemv kernels are optimized for m=1 only + if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { + printf("TEST[%ld] = SKIPPED\n", idx_variant); + continue; + } // Get the packing parameters const size_t mr = ukernel_variants[idx_variant].ukernel.get_mr(); -- GitLab From 79ef006b4a23794bd9c0227e59d2e6ee3e07ecd8 Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Fri, 28 Feb 2025 17:17:04 +0000 Subject: [PATCH 4/8] skip gemv unit tests for unsupported shapes Signed-off-by: Evie Wright --- .../matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp | 8 ++++++++ .../matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp | 16 ++++++++++++++++ .../matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 8 ++++++++ 3 files changed, 32 insertions(+) diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp index 90000e97..a6db73c5 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp @@ -173,6 +173,10 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_nxk) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -307,6 +311,10 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_kxn) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs_transposed = fill_random(N * K, seed + 1); diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp index c0018dd6..5c84b44b 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp @@ -291,6 +291,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsi4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -415,6 +419,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsu4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -542,6 +550,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsi4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -670,6 +682,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsu4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index 4dd69e4b..8f0fa8b9 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -112,6 +112,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -226,6 +230,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + if (mr == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1"; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); -- GitLab From cfa05472e1149f616b0c760c946dd7bdee7ef377 Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Mon, 3 Mar 2025 11:21:55 +0000 Subject: [PATCH 5/8] add clarity to output of example tests when skipped Signed-off-by: Evie Wright --- .../matmul_clamp_f32_qai8dxp_qsi4c32p.cpp | 4 ++-- .../matmul_clamp_f32_qai8dxp_qsi4cxp.cpp | 4 ++-- .../matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp index 64e6d34b..16b95b7c 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp @@ -630,9 +630,9 @@ int main() { for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { std::cout << "TEST[" << idx_variant << "]: Dynamic quantization + matmul" << std::endl; std::cout << "- ukernel: " << ukernel_variants[idx_variant].name << std::endl; - // Skip gemv kernels for non-gemv shapes. Gemv kernels are optimized for m=1 only + // Skip gemv kernels for non-gemv shapes if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { - std::cout << "Status: SKIPPED" << std::endl; + std::cout << "Status: SKIPPED - GEMV kernels are optimized for m=1 only" << std::endl; std::cout << "------------" << std::endl; continue; } diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp index ef529954..be609b28 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp @@ -606,9 +606,9 @@ int main(int argc, char** argv) { //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; - // Skip gemv kernels for non-gemv shapes. Gemv kernels are optimized for m=1 only + // Skip gemv kernels for non-gemv shapes if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { - printf("TEST[%ld] = SKIPPED\n", idx_variant); + printf("TEST[%ld] = SKIPPED (GEMV kernels optimized for m=1 only)\n", idx_variant); continue; } diff --git a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp index 118f14ca..60185ee1 100644 --- a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp +++ b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp @@ -341,9 +341,9 @@ int main(int argc, char** argv) { //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; - // Skip gemv kernels for non-gemv shapes. Gemv kernels are optimized for m=1 only + // Skip gemv kernels for non-gemv shapes if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { - printf("TEST[%ld] = SKIPPED\n", idx_variant); + printf("TEST[%ld] = SKIPPED (GEMV kernels optimized for m=1 only)\n", idx_variant); continue; } -- GitLab From c4b01701884c286ad498d656f52808b15a3c15d1 Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Mon, 3 Mar 2025 14:18:51 +0000 Subject: [PATCH 6/8] update print statements in examples to C++ style Signed-off-by: Evie Wright --- .../matmul_clamp_f32_qai8dxp_qsi4cxp.cpp | 7 ++++--- .../matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp index be609b28..ed262324 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp @@ -608,7 +608,8 @@ int main(int argc, char** argv) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; // Skip gemv kernels for non-gemv shapes if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { - printf("TEST[%ld] = SKIPPED (GEMV kernels optimized for m=1 only)\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = SKIPPED" << std::endl; + std::cout << "- GEMV kernels are optimized for m=1 only, but here m=" << m << std::endl; continue; } @@ -697,9 +698,9 @@ int main(int argc, char** argv) { is_output_correct(m, n, 0.0001f, (const float*)dst_ref_mtx_f32, (const float*)dst_act_mtx_f32); if (is_valid) { - printf("TEST[%ld] = PASSED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = PASSED" << std::endl; } else { - printf("TEST[%ld] = FAILED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = FAILED" << std::endl; } delete[] lhs_packed_mtx_qa8dx; diff --git a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp index 60185ee1..9d233f33 100644 --- a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp +++ b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp @@ -343,7 +343,8 @@ int main(int argc, char** argv) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; // Skip gemv kernels for non-gemv shapes if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { - printf("TEST[%ld] = SKIPPED (GEMV kernels optimized for m=1 only)\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = SKIPPED" << std::endl; + std::cout << "- GEMV kernels are optimized for m=1 only, but here m=" << m << std::endl; continue; } @@ -420,10 +421,10 @@ int main(int argc, char** argv) { is_output_correct(m, n, 0.0001f, (const float*)dst_ref_mtx_f32, (const float*)dst_act_mtx_f32); if (is_valid) { - printf("TEST[%ld] = PASSED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = PASSED" << std::endl; std::cout << "- Performance: " << elap.count() << " us" << std::endl; } else { - printf("TEST[%ld] = FAILED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = FAILED" << std::endl; } delete[] lhs_packed_mtx_qs8d32; delete[] rhs_packed_mtx_qs4c32; -- GitLab From f492b865b5803ab1abcead726c65d78db6e02131 Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Mon, 3 Mar 2025 14:38:53 +0000 Subject: [PATCH 7/8] print current value of M when skipping unit tests because M > 1 Signed-off-by: Evie Wright --- test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp | 4 ++-- test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp | 8 ++++---- test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 4 ++-- test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp index a6db73c5..5925a18f 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp @@ -174,7 +174,7 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_nxk) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. @@ -312,7 +312,7 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_kxn) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp index 5c84b44b..8c418a92 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp @@ -292,7 +292,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsi4cx) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. @@ -420,7 +420,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsu4cx) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. @@ -551,7 +551,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsi4cx) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. @@ -683,7 +683,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsu4cx) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index 8f0fa8b9..365da8e3 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -113,7 +113,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. @@ -231,7 +231,7 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { const auto sr = ukernel_variant.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } // Generates input data. diff --git a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp index ce5c6ebc..1236677f 100644 --- a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp @@ -180,7 +180,7 @@ TEST_P(MatMulTest_f32_qsi8d32p_qsi4c32p, EndToEnd) { const auto sr = ukernel_variant.ukernel.interface.get_sr(); if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } auto m_step = ukernel_variant.ukernel.interface.get_m_step(); -- GitLab From 2678c9fb996ce7b0f9723156e08ea8ebb6de4b5f Mon Sep 17 00:00:00 2001 From: Evie Wright Date: Tue, 4 Mar 2025 11:39:56 +0000 Subject: [PATCH 8/8] switch check for gemv kernel to use m_step in all cases Signed-off-by: Evie Wright --- ...matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp | 16 +++++----- .../matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp | 32 +++++++++---------- .../matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp | 16 +++++----- ...atmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp | 8 ++--- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp index 5925a18f..96f67ffe 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp @@ -173,7 +173,10 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_nxk) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -197,9 +200,6 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_nxk) { ref_rhs_scales.data(), nullptr, bl, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -311,7 +311,10 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_kxn) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -347,9 +350,6 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_kxn) { ref_rhs_scales.data(), nullptr, bl, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp index 8c418a92..ea8eb715 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp @@ -291,7 +291,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsi4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -314,9 +317,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsi4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -419,7 +419,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsu4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -442,9 +445,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsu4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -550,7 +550,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsi4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -584,9 +587,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsi4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -682,7 +682,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsu4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -717,9 +720,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsu4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index 365da8e3..208c9a5c 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -112,7 +112,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -135,9 +138,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -230,7 +230,10 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } @@ -264,9 +267,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); diff --git a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp index 1236677f..17e4ba44 100644 --- a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp @@ -179,13 +179,13 @@ TEST_P(MatMulTest_f32_qsi8d32p_qsi4c32p, EndToEnd) { const auto kr = ukernel_variant.ukernel.interface.get_kr(); const auto sr = ukernel_variant.ukernel.interface.get_sr(); - if (mr == 1 && M > 1) { + const auto m_step = ukernel_variant.ukernel.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; } - auto m_step = ukernel_variant.ukernel.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.ukernel.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); -- GitLab