diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp index 4ce0e7b4d7392f3f4efb49a78f20b653f6efe908..16b95b7c9f304fb4b6367566c98852238a5f0b62 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4c32p/matmul_clamp_f32_qai8dxp_qsi4c32p.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -628,6 +628,15 @@ int main() { //------------------------------------ //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { + std::cout << "TEST[" << idx_variant << "]: Dynamic quantization + matmul" << std::endl; + std::cout << "- ukernel: " << ukernel_variants[idx_variant].name << std::endl; + // Skip gemv kernels for non-gemv shapes + if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { + std::cout << "Status: SKIPPED - GEMV kernels are optimized for m=1 only" << std::endl; + std::cout << "------------" << std::endl; + continue; + } + // Get the packing parameters const size_t mr = ukernel_variants[idx_variant].ukernel.get_mr(); const size_t nr = ukernel_variants[idx_variant].ukernel.get_nr(); @@ -735,8 +744,6 @@ int main() { const bool is_valid = is_output_correct(m, n, 0.0001f, (const float*)dst_ref_mtx_f32, (const float*)dst_act_mtx_f32); - std::cout << "TEST[" << idx_variant << "]: Dynamic quantization + matmul" << std::endl; - std::cout << "- ukernel: " << ukernel_variants[idx_variant].name << std::endl; if (is_valid) { std::cout << "- Status: PASSED" << std::endl; std::cout << "- Performance: " << elap.count() << " us" << std::endl; diff --git a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp index 53e64664a602565caa8d10643043a74f72d24416..ed2623243ebc5c8b6d33f8fa3e72bc56aa890d05 100644 --- a/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp +++ b/examples/matmul_clamp_f32_qai8dxp_qsi4cxp/matmul_clamp_f32_qai8dxp_qsi4cxp.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -606,6 +606,12 @@ int main(int argc, char** argv) { //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; + // Skip gemv kernels for non-gemv shapes + if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { + std::cout << "TEST[" << idx_variant << "] = SKIPPED" << std::endl; + std::cout << "- GEMV kernels are optimized for m=1 only, but here m=" << m << std::endl; + continue; + } // Get the packing parameters const size_t mr = ukernel_variants[idx_variant].ukernel.get_mr(); @@ -692,9 +698,9 @@ int main(int argc, char** argv) { is_output_correct(m, n, 0.0001f, (const float*)dst_ref_mtx_f32, (const float*)dst_act_mtx_f32); if (is_valid) { - printf("TEST[%ld] = PASSED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = PASSED" << std::endl; } else { - printf("TEST[%ld] = FAILED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = FAILED" << std::endl; } delete[] lhs_packed_mtx_qa8dx; diff --git a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp index 6d992b625e4387f7d5e7230261764ce11119e545..9d233f33dc4a89b7bb0f44ca6ab4e07e0f814411 100644 --- a/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp +++ b/examples/matmul_clamp_f32_qsi8d32p_qsi4c32p/matmul_clamp_f32_qsi8d32p_qsi4c32p.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -341,6 +341,12 @@ int main(int argc, char** argv) { //------------------------------------ for (size_t idx_variant = 0; idx_variant < num_ukernel_variants; ++idx_variant) { std::cout << "Testing " << ukernel_variants[idx_variant].name << std::endl; + // Skip gemv kernels for non-gemv shapes + if ((m > 1) && (ukernel_variants[idx_variant].ukernel.get_m_step() == 1)) { + std::cout << "TEST[" << idx_variant << "] = SKIPPED" << std::endl; + std::cout << "- GEMV kernels are optimized for m=1 only, but here m=" << m << std::endl; + continue; + } // Get the packing parameters const size_t mr = ukernel_variants[idx_variant].ukernel.get_mr(); @@ -415,10 +421,10 @@ int main(int argc, char** argv) { is_output_correct(m, n, 0.0001f, (const float*)dst_ref_mtx_f32, (const float*)dst_act_mtx_f32); if (is_valid) { - printf("TEST[%ld] = PASSED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = PASSED" << std::endl; std::cout << "- Performance: " << elap.count() << " us" << std::endl; } else { - printf("TEST[%ld] = FAILED\n", idx_variant); + std::cout << "TEST[" << idx_variant << "] = FAILED" << std::endl; } delete[] lhs_packed_mtx_qs8d32; delete[] rhs_packed_mtx_qs4c32; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c index c7fac42187d8d0a66b25afc022dcf92cadf77317..843b5c826c4f125b78d11763b61a1d18d2a7022a 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p4x4_1x4_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c index ee62e401168447ef1060c7149ae393fcc14439af..3974bccfc21be0d7239214e01afc447f54851379 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4c32p8x4_1x8_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index 3fc3cfc6573ec1afea035b659c2568cb71a15c69..67e2bd5fa3850384786722dba2f48710626e995a 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c index ad9327f4a1b1e0df7508a572ba7efae7aa0e7982..b71cd9558e1194f851ec93a9475d45427d5939f6 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c index 4738d9aa11a0ffbffc797f778bd35627f383bd84..06318517956d153ddfd14eae33c871a1d0d79879 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c @@ -159,6 +159,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod( KAI_ASSUME(dst_stride_col == sizeof(float)); KAI_ASSUME((k % bl) == 0); KAI_ASSUME((bl % kai_bl) == 0); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c index b8e2f832a6e19f99910fd6996d6c9365a7e37b98..45b54cdf311add31ead21bcd4aa70823ae1837d7 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot.c @@ -115,6 +115,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4vlx4_1x4vl_sme2_sdot( float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0 || n == 0 || k == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c index ab136e1a6033aa418b2d155051caf7f834bbc58a..d56a7205048f9190ed1033605f4d597deb21eacb 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod.c @@ -126,6 +126,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi4cxp4x4_1x4_neon_dotprod( float scalar_min, // float scalar_max) { KAI_ASSUME(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c index 03cf6abb0b7a18ec8cb9b44170580e91a3aefaa7..20b178f53652480e01d536ad7d37ad90db74429e 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c @@ -103,6 +103,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod( float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c index c9122f5a5d467765c8426f899388e7c607597289..4bd47c15047ce31619e1a46ec40e8245e3660d4e 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c @@ -103,6 +103,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod( float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c index bd5246fa692d7bf88fd9fc038fb34c99edab431a..1d159b4f846fb500835e0af40ff3c65ab6364448 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c @@ -124,6 +124,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod( float scalar_min, // float scalar_max) { KAI_ASSUME(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c index 784a1165aae26d228161c56b07d656a0db096b96..4367df9f487a997ee75e7fb0bc2cc4444616d3a0 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c @@ -124,6 +124,7 @@ void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod( float scalar_min, // float scalar_max) { KAI_ASSUME(dst_stride_col == sizeof(float)); + KAI_ASSUME(m == 1); if (m == 0) { return; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index 69096ff732778cd4898d91568cda70d008d86b93..e4f2bcfdeade71e988f1365dcbb666e64e440e7d 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -115,6 +115,7 @@ void kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod( size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSUME(bl == kai_bl); KAI_ASSUME(k % kai_bl == 0); + KAI_ASSUME(m == 1); KAI_ASSUME(dst_stride_col == sizeof(float)); if (m == 0) { diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp index 90000e97cbff905c002827ed40d4dac1a5c13f88..96f67ffea7ace315a60764114b5266d0a40dd16f 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4c32p_test.cpp @@ -173,6 +173,13 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_nxk) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -193,9 +200,6 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_nxk) { ref_rhs_scales.data(), nullptr, bl, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -307,6 +311,13 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_kxn) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs_transposed = fill_random(N * K, seed + 1); @@ -339,9 +350,6 @@ TEST_P(MatMulTest_f32_qmatmul_clamp_f32_qai8dxp_qsi4c32p, EndToEnd_RHS_kxn) { ref_rhs_scales.data(), nullptr, bl, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp index c0018dd600342fc7f25cac1a42ca72e99d19d96c..ea8eb71539cbc2781973c4085fb6427426bcb2ae 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp @@ -291,6 +291,13 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsi4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -310,9 +317,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsi4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -415,6 +419,13 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsu4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -434,9 +445,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsu4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -542,6 +550,13 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsi4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -572,9 +587,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsi4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -670,6 +682,13 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsu4cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -701,9 +720,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsu4cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp index 4dd69e4ba9179f547595932e5ea708d43b096950..208c9a5c69fd28789eeb38cac9e0a28756c4b983 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi8cxp_test.cpp @@ -112,6 +112,13 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -131,9 +138,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_nxk_qsi8cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); @@ -226,6 +230,13 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { const auto kr = ukernel_variant.interface.get_kr(); const auto sr = ukernel_variant.interface.get_sr(); + const auto m_step = ukernel_variant.interface.get_m_step(); + ASSERT_TRUE(m_step % mr == 0); + + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_rhs = fill_random(N * K, seed + 1); @@ -256,9 +267,6 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi8cxp, EndToEnd_RHS_kxn_qsi8cx) { ref_rhs_scales.data(), nullptr, K, ref_biases.data(), std::numeric_limits::lowest(), std::numeric_limits::max()); - auto m_step = ukernel_variant.interface.get_m_step(); - ASSERT_TRUE(m_step % mr == 0); - auto n_step = ukernel_variant.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0); diff --git a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp index ce5c6ebce2e3a8087d452500465c9c0a14ee5334..17e4ba448314b03cc155ee4f57c4fd23bebe96c1 100644 --- a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp @@ -179,13 +179,13 @@ TEST_P(MatMulTest_f32_qsi8d32p_qsi4c32p, EndToEnd) { const auto kr = ukernel_variant.ukernel.interface.get_kr(); const auto sr = ukernel_variant.ukernel.interface.get_sr(); - if (mr == 1 && M > 1) { - GTEST_SKIP() << "Kernel does not support M != 1"; - } - - auto m_step = ukernel_variant.ukernel.interface.get_m_step(); + const auto m_step = ukernel_variant.ukernel.interface.get_m_step(); ASSERT_TRUE(m_step % mr == 0); + if (m_step == 1 && M > 1) { + GTEST_SKIP() << "Kernel does not support M != 1, but here M = " << M; + } + auto n_step = ukernel_variant.ukernel.interface.get_n_step(); ASSERT_TRUE(n_step % nr == 0);