diff --git a/test/common/test_suite.hpp b/test/common/test_suite.hpp index cc79103785148c5da7493334c21fe17d1a7c6c8f..1bc851774da8c695ae3574f71fe63d0054ce4c24 100644 --- a/test/common/test_suite.hpp +++ b/test/common/test_suite.hpp @@ -76,6 +76,22 @@ struct MatMulShape { size_t m{}; ///< LHS height. size_t n{}; ///< RHS width. size_t k{}; ///< LHS width and RHS height. +private: + friend bool operator==(const MatMulShape& lhs, const MatMulShape& rhs) { + return // + lhs.m == rhs.m && // + lhs.n == rhs.n && // + lhs.k == rhs.k; + } +}; + +struct HashMatMulShape { + size_t operator()(const kai::test::MatMulShape& shape) const { + return // + (std::hash{}(shape.m) << 0) ^ // + (std::hash{}(shape.n) << 1) ^ // + (std::hash{}(shape.k) << 2); + } }; /// Matrix multiplication test information. diff --git a/test/reference/matmul.cpp b/test/reference/matmul.cpp index f7888e291ae6cf1c83111c5e77a25bad76b2eee0..e735b23f553730a61f30e02d86399a3d3b5b5e02 100644 --- a/test/reference/matmul.cpp +++ b/test/reference/matmul.cpp @@ -189,7 +189,8 @@ template < typename RhsZeroPoint, typename BiasData, typename BiasScale, typename BiasZeroPoint, typename DstData> std::vector indirect_matmul_nt_t_quantized( size_t m, size_t n, size_t k_chunk_count, size_t k_chunk_length, // - const void* const* lhs_ptrs, const void* lhs_scales, const void* lhs_zero_points, size_t lhs_quant_height, + const void* const* lhs_ptrs, uintptr_t lhs_offset, const void* lhs_padding, const void* lhs_scales, + const void* lhs_zero_points, size_t lhs_quant_height, size_t lhs_quant_width, // const void* rhs_data, const void* rhs_scales, const void* rhs_zero_points, size_t rhs_quant_height, size_t rhs_quant_width, // @@ -199,27 +200,32 @@ std::vector indirect_matmul_nt_t_quantized( std::vector dst(m * n * sizeof(DstData)); - for (size_t y = 0; y < m; ++y) { - for (size_t x = 0; x < n; ++x) { + for (size_t i_m = 0; i_m < m; ++i_m) { + for (size_t i_n = 0; i_n < n; ++i_n) { DstData acc = 0; for (size_t i_k_chunk = 0; i_k_chunk < k_chunk_count; ++i_k_chunk) { - const void* lhs_data = *(lhs_ptrs + (y * k_chunk_count + i_k_chunk)); + // Calculate the K chunk pointer. Apply offset if this is not padding + const size_t k_chunk_idx = i_m * k_chunk_count + i_k_chunk; + const void* k_chunk_ptr = lhs_ptrs[k_chunk_idx]; + if (k_chunk_ptr != lhs_padding) { + k_chunk_ptr = reinterpret_cast(reinterpret_cast(k_chunk_ptr) + lhs_offset); + } for (size_t i_k_chunk_len = 0; i_k_chunk_len < k_chunk_length; ++i_k_chunk_len) { const size_t i = i_k_chunk * k_chunk_length + i_k_chunk_len; const auto lhs_data_index = i_k_chunk_len; - const auto lhs_quant_index = (y / lhs_quant_height) * lhs_num_quant_per_row + i / lhs_quant_width; - const auto lhs_value = read_array(lhs_data, lhs_data_index); + const auto lhs_quant_index = (i_m / lhs_quant_height) * lhs_num_quant_per_row + i / lhs_quant_width; + const auto lhs_value = read_array(k_chunk_ptr, lhs_data_index); const auto lhs_scale = lhs_scales != nullptr ? read_array(lhs_scales, lhs_quant_index) : static_cast(1); const auto lhs_zero_point = lhs_zero_points != nullptr ? read_array(lhs_zero_points, lhs_quant_index) : static_cast(0); - const auto rhs_data_index = x * (k_chunk_count * k_chunk_length) + i; - const auto rhs_quant_index = (x / rhs_quant_height) * rhs_num_quant_per_row + i / rhs_quant_width; + const auto rhs_data_index = i_n * (k_chunk_count * k_chunk_length) + i; + const auto rhs_quant_index = (i_n / rhs_quant_height) * rhs_num_quant_per_row + i / rhs_quant_width; const auto rhs_value = read_array(rhs_data, rhs_data_index); const auto rhs_scale = rhs_scales != nullptr ? read_array(rhs_scales, rhs_quant_index) : static_cast(1); @@ -235,19 +241,19 @@ std::vector indirect_matmul_nt_t_quantized( } if (bias_data != nullptr) { - const auto bias_value = read_array(bias_data, x); + const auto bias_value = read_array(bias_data, i_n); const auto bias_scale = bias_scales != nullptr - ? read_array(bias_scales, x / bias_quant_width) + ? read_array(bias_scales, i_n / bias_quant_width) : static_cast(1); const auto bias_zero_point = bias_zero_points != nullptr - ? read_array(bias_zero_points, x / bias_quant_width) + ? read_array(bias_zero_points, i_n / bias_quant_width) : static_cast(0); acc += (static_cast(bias_value) - static_cast(bias_zero_point)) * static_cast(bias_scale); } - write_array(dst.data(), y * n + x, acc); + write_array(dst.data(), i_m * n + i_n, acc); } } @@ -331,7 +337,8 @@ matmul_nt_t_quantized indirect_matmul_nt_t_quantized( size_t m, size_t n, size_t k_chunk_count, size_t k_chunk_length, // - const void* const* lhs_ptrs, const void* lhs_scales, const void* lhs_zero_points, size_t lhs_quant_height, + const void* const* lhs_ptrs, uintptr_t lhs_offset, const void* lhs_padding, const void* lhs_scales, + const void* lhs_zero_points, size_t lhs_quant_height, size_t lhs_quant_width, // const void* rhs_data, const void* rhs_scales, const void* rhs_zero_points, size_t rhs_quant_height, size_t rhs_quant_width, // diff --git a/test/reference/matmul.hpp b/test/reference/matmul.hpp index 913de1f84ba8d1196afae424f3873135903ca6e5..8d83e98c2455cb8e7c972d097586e1eefd55b5aa 100644 --- a/test/reference/matmul.hpp +++ b/test/reference/matmul.hpp @@ -122,7 +122,12 @@ std::vector matmul_clamp_nt_t( /// @param[in] m The LHS and output height. /// @param[in] n The RHS height and output width. /// @param[in] k The LHS and RHS width. +/// @param[in] k_chunk_count Number of K chunk pointers per row in lhs_idata matrix +/// @param[in] k_chunk_length Lenght of each K chunk pointed to in lhs_idata matrix /// @param[in] lhs_data The LHS data matrix. +/// @param[in] lhs_idata The indirect LHS data matrix. +/// @param[in] lhs_offset The indirection LHS data matrix offset, applied to non-padding pointers +/// @parma[in] lhs_padding The indirection LHS padding chunk pointer /// @param[in] lhs_scales The LHS quantization scales matrix. /// @param[in] lhs_zero_points The LHS quantization zero points matrix. /// @param[in] lhs_quant_width The LHS quantization block width. @@ -161,7 +166,8 @@ template < typename RhsZeroPoint, typename BiasData, typename BiasScale, typename BiasZeroPoint, typename DstData> std::vector indirect_matmul_nt_t_quantized( size_t m, size_t n, size_t k_chunk_count, size_t k_chunk_length, // - const void* const* lhs_ptrs, const void* lhs_scales, const void* lhs_zero_points, size_t lhs_quant_height, + const void* const* lhs_idata, uintptr_t lhs_offset, const void* lhs_padding, const void* lhs_scales, + const void* lhs_zero_points, size_t lhs_quant_height, size_t lhs_quant_width, // const void* rhs_data, const void* rhs_scales, const void* rhs_zero_points, size_t rhs_quant_height, size_t rhs_quant_width, // diff --git a/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp b/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp index d71193a913a79164157516ddd9c0267abfb7a4f5..e7ded92ce75bb01f50de2e1abf6fd2940b7f722b 100644 --- a/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "kai/kai_common.h" @@ -336,6 +337,8 @@ struct TestReference { Buffer lhs_qai8_zero_points; IndirectionBuffer lhs_qai8_indirect; Buffer lhs_qai8_indirect_packed; + Buffer lhs_qai8_indirect_padding; + size_t lhs_qai8_indirect_offset; Buffer rhs_qsi8; Buffer rhs_scales; @@ -380,30 +383,59 @@ static const kai_imatmul_clamp_qai8_qai8p_qsi8cxp_ukernel .run_matmul = kai_run_imatmul_clamp_qai8_qai8p2vlx4_qsi8cxpsb2vlx4_2vlx2vl_sme2_mopa, }; -// M, N, K, k_chunk_length, pack.m, pack.n, pack.k -using TestDataId = std::tuple; +static constexpr int8_t padding_value = 0; + +// Functionality for hashing generated test data. +// This is particularly useful for portion testing +// which reuses the exact same data for all portions +struct TestDataId { + MatMulShape shape; + MatMulShape shape_pack; + size_t chunk_len; + bool pad_testing; + +private: + friend bool operator==(const TestDataId& lhs, const TestDataId& rhs) { + return // + lhs.shape == rhs.shape && // + lhs.shape_pack == rhs.shape_pack && // + lhs.chunk_len == rhs.chunk_len && // + lhs.pad_testing == rhs.pad_testing; + } +}; + +struct HashTestDataId { + size_t operator()(const TestDataId& id) const { + return // + (HashMatMulShape{}(id.shape) << 0) ^ // + (HashMatMulShape{}(id.shape_pack) << 1) ^ // + (std::hash{}(id.chunk_len) << 2) ^ // + (std::hash{}(id.pad_testing) << 3); + } +}; + // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) -static std::map g_data; +static std::unordered_map g_data; // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) /// Generate test reference data -static const TestReference& get_test_reference( - const MatMulShape& shape, const MatMulShape& pack_shape, size_t k_chunk_len) { +static const TestReference& get_test_reference(const TestDataId& test_data_id) { // ============================================================ // Generates input and reference output data // ============================================================ // Attempt to find test data in cache - const TestDataId data_id{shape.m, shape.n, shape.k, k_chunk_len, pack_shape.m, pack_shape.n, pack_shape.k}; - const auto data_it = g_data.find(data_id); + const auto data_it = g_data.find(test_data_id); if (data_it != g_data.end()) { return data_it->second; } + const auto& [shape, pack_shape, k_chunk_len, pad_testing] = test_data_id; + // Generates the input data in floating-point. - const auto lhs_f32 = fill_random(shape.m * shape.k, seed); - const auto rhs_f32 = fill_random(shape.k * shape.n, seed); - const auto bias_f32 = fill_random(shape.n, seed); + Buffer lhs_f32 = fill_random(shape.m * shape.k, seed); + const Buffer rhs_f32 = fill_random(shape.k * shape.n, seed); + const Buffer bias_f32 = fill_random(shape.n, seed); // Quantizes the input data. // * LHS: 8-bit asymmetric per-matrix quantization. @@ -417,18 +449,26 @@ static const TestReference& get_test_reference( const auto lhs_scale = read_array(lhs_qai8_scales.data(), 0); const auto lhs_zero_point = read_array(lhs_qai8_zero_points.data(), 0); - IndirectionBuffer lhs_qai8_indirect; - const size_t k_chunk_count = shape.k / k_chunk_len; assert(k_chunk_count * k_chunk_len == shape.k); // Setup an indirection buffer, where each "row" contains `k_chunk_count` // pointers to chunks of length `k_chunk_len` in the input_buffer + IndirectionBuffer lhs_qai8_indirect(shape.m * k_chunk_count); + Buffer lhs_padding(k_chunk_len, padding_value); for (size_t m_i = 0; m_i < shape.m; ++m_i) { for (size_t k_chunk_idx = 0; k_chunk_idx < k_chunk_count; ++k_chunk_idx) { - lhs_qai8_indirect.push_back(&lhs_qai8.at(m_i * shape.k + k_chunk_idx * k_chunk_len)); + const size_t idx = m_i * k_chunk_count + k_chunk_idx; + if (pad_testing and m_i == 0) { + // Push padding pointers for first row + lhs_qai8_indirect[idx] = lhs_padding.data(); + } else { + uintptr_t offset = m_i * shape.k + k_chunk_idx * k_chunk_len; + lhs_qai8_indirect[idx] = reinterpret_cast(offset); + } } } + const auto indirection_base = reinterpret_cast(lhs_qai8.data()); // Reorder indirection pointers to layout the packing kernel expectes Buffer lhs_qai8_indirect_packed = reorder_block( @@ -449,11 +489,12 @@ static const TestReference& get_test_reference( quantize_symmetric_per_block(bias_f32.data(), bias_scales.data(), shape.n, 1, 1); // Runs the reference implementation of matmul to produce floating-point result. + const void* const* lhs_iptr = reinterpret_cast(lhs_qai8_indirect.data()); const auto ref_dst_f32 = indirect_matmul_nt_t_quantized( - shape.m, shape.n, k_chunk_count, k_chunk_len, // matmul shape - reinterpret_cast(lhs_qai8_indirect.data()), &lhs_scale, - &lhs_zero_point, // LHS, scaling factor and zero point + shape.m, shape.n, k_chunk_count, k_chunk_len, // matmul shape + lhs_iptr, indirection_base, lhs_padding.data(), // LHS indirection, offset and padding + &lhs_scale, &lhs_zero_point, // LHS, scaling factor and zero point shape.m, shape.k, // LHS quantization window shape rhs_qsi8_t.data(), rhs_scales.data(), nullptr, // RHS scaling factors 1, shape.k, // RHS quantization window shape @@ -505,7 +546,7 @@ static const TestReference& get_test_reference( rhs_qsi8_t.data(), rhs_scales.data(), lhs_scale, dst_scale, bias_qsi32.data(), lhs_zero_point, shape.n, shape.k, pack_shape.n, pack_shape.k); - const TestReference& reference = g_data[data_id] = { + const TestReference& reference = g_data[test_data_id] = { .clamp = {.min = dst_qai8_clamp_min, .max = dst_qai8_clamp_max}, .qa_lhs = {.scale = lhs_scale, .zero_point = lhs_zero_point}, @@ -516,6 +557,8 @@ static const TestReference& get_test_reference( .lhs_qai8_zero_points = std::move(lhs_qai8_zero_points), .lhs_qai8_indirect = std::move(lhs_qai8_indirect), .lhs_qai8_indirect_packed = std::move(lhs_qai8_indirect_packed), + .lhs_qai8_indirect_padding = std::move(lhs_padding), + .lhs_qai8_indirect_offset = indirection_base, .rhs_qsi8 = std::move(rhs_qsi8), .rhs_scales = std::move(rhs_scales), @@ -715,7 +758,8 @@ TEST_P(MatMulQuantizedTest, EndToEnd) { GTEST_SKIP() << "CPU features are not supported by current CPU"; } - TestReference reference = get_test_reference(shape, variant.acc_pack, 1); + TestDataId test_data_id{shape, variant.acc_pack, shape.k, false}; + const TestReference& reference = get_test_reference(test_data_id); // Check scheduling parameters const auto imp_mr = variant.matmul.get_mr(); @@ -762,13 +806,11 @@ static Buffer lhs_pack( const size_t input_offset = portion.start_row() * k_chunk.count; const size_t dst_offset = variant.get_packed_lhs_offset(portion.start_row(), k_chunk.count, k_chunk.length); - // TODO: `lhs_offset` is currently not being excercized! - // TODO: Ensure that `zero` pointers are tested variant.pack( portion.height(), k_chunk.count, k_chunk.length, // Dimensions indirection_pointer + input_offset, // Indirection input - 0, // chunk offset - nullptr, // padding pointer + reference.lhs_qai8_indirect_offset, // chunk offset + reference.lhs_qai8_indirect_padding.data(), // padding pointer packed.data() + dst_offset); return packed; @@ -854,7 +896,9 @@ TEST_P(IndirectMatMulQuantizedTest, EndToEnd) { GTEST_SKIP() << "CPU features are not supported by current CPU"; } - const TestReference& reference = get_test_reference(shape, variant.acc_pack, k_chunk.length); + // Toggle padding testst when LHS has more than one row + TestDataId test_data_id{shape, variant.acc_pack, k_chunk.length, shape.m > 1}; + const TestReference& reference = get_test_reference(test_data_id); const Rect portion = output_portion.compute_portion(shape.m, shape.n, variant.acc_step.m, variant.acc_step.n); Buffer packed_lhs = imatmul::lhs_pack(variant.lhs_pack, portion, reference, shape.m, k_chunk);