From a14e018b49a4acb8eca7c4fe1491c29c752c7759 Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Mon, 19 Aug 2024 11:14:25 +0100 Subject: [PATCH] Add end-to-end test for int4 GEMV * Add test coverage for the matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod micro-kernel. Signed-off-by: Jakub Sujak --- ...atmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp | 73 ++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp index ad3e761c..8dba1ac3 100644 --- a/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp +++ b/test/tests/matmul_clamp_f32_qsi8d32p_qsi4c32p_test.cpp @@ -10,6 +10,7 @@ #include #include +#include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h" #include "kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" #include "kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.h" #include "kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h" @@ -24,7 +25,7 @@ namespace kai::test { -TEST(matmul_clamp_f32_qsi8d32p_qsi4c32p, EndToEnd) { +TEST(matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm, EndToEnd) { const std::uint64_t seed = 0; const size_t M = 32; @@ -70,7 +71,7 @@ TEST(matmul_clamp_f32_qsi8d32p_qsi4c32p, EndToEnd) { kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0( 1, N, K, nr, kr, sr, bl, ref_rhs_qsu4_scale_f16.data(), nullptr, imp_packed_rhs.data(), 0, ¶ms); - // Runs the GEMM micr-kernel. + // Runs the GEMM micro-kernel. const auto imp_dst_size = kai_get_dst_size_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm(M, N); ASSERT_EQ(imp_dst_size, ref_dst.size()); std::vector imp_dst(imp_dst_size); @@ -92,4 +93,72 @@ TEST(matmul_clamp_f32_qsi8d32p_qsi4c32p, EndToEnd) { } } +TEST(matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod, EndToEnd) { + const std::uint64_t seed = 0; + + const size_t M = 32; + const size_t N = 64; + const size_t K = 64; + + const auto mr = kai_get_mr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod(); + const auto nr = kai_get_nr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod(); + const auto kr = kai_get_kr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod(); + const auto sr = kai_get_sr_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod(); + const size_t bl = 32; + + // Generates input data. + const auto ref_lhs = fill_random(M * K, seed + 0); + const auto ref_rhs = fill_random(N * K, seed + 1); + + // Runs the reference implementation. + const auto [ref_lhs_qvalues, ref_lhs_scales] = + quantize_symmetric_per_block(ref_lhs.data(), M, K, bl); + const auto [ref_rhs_qsi4, ref_rhs_scales] = + quantize_symmetric_per_block(ref_rhs.data(), N, K, bl); + + const auto ref_dst = matmul_clamp_nt_t( + M, N, K, ref_lhs_qvalues.data(), ref_lhs_scales.data(), nullptr, bl, ref_rhs_qsi4.data(), ref_rhs_scales.data(), + nullptr, bl, nullptr, std::numeric_limits::lowest(), std::numeric_limits::max()); + + // Runs the LHS packing micro-kernel. + const auto imp_packed_lhs_size = kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32(M, K, bl, mr, kr, sr); + std::vector imp_packed_lhs(imp_packed_lhs_size); + kai_run_lhs_quant_pack_qsi8d32p_f32( + M, K, bl, mr, kr, sr, 0, reinterpret_cast(ref_lhs.data()), K * sizeof(float), + imp_packed_lhs.data()); + + // Runs the RHS packing micro-kernel. + const auto ref_rhs_qsu4 = cast_qsu4_qsi4(ref_rhs_qsi4.data(), N * K); + const auto ref_rhs_qsu4_scale_f16 = + pack_data_scales_interleave_block(ref_rhs_qsu4.data(), ref_rhs_scales.data(), N, K, bl); + + const auto imp_packed_rhs_size = + kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0(N, K, nr, kr, bl); + std::vector imp_packed_rhs(imp_packed_rhs_size); + const kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0_params params{.lhs_zero_point = 1, .rhs_zero_point = 8}; + kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0( + 1, N, K, nr, kr, sr, bl, ref_rhs_qsu4_scale_f16.data(), nullptr, imp_packed_rhs.data(), 0, ¶ms); + + // Runs the GEMM micro-kernel. + const auto imp_dst_size = kai_get_dst_size_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod(M, N); + ASSERT_EQ(imp_dst_size, ref_dst.size()); + std::vector imp_dst(imp_dst_size); + kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod( + M, N, K, bl, imp_packed_lhs.data(), imp_packed_rhs.data(), reinterpret_cast(imp_dst.data()), + N * sizeof(float), sizeof(float), std::numeric_limits::lowest(), std::numeric_limits::max()); + + // Compares the output of the micro-kernels against the output of the reference implementation. + for (size_t y = 0; y < M; ++y) { + for (size_t x = 0; x < N; ++x) { + const auto imp_value = read_array(imp_dst.data(), y * N + x); + const auto ref_value = read_array(ref_dst.data(), y * N + x); + const auto rel_error = ref_value != 0 ? std::abs((imp_value - ref_value) / ref_value) : std::abs(imp_value); + + if (rel_error > 0.0001F) { + ASSERT_EQ(imp_value, ref_value); + } + } + } +} + } // namespace kai::test -- GitLab