diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c index 36c20fd127c6130dd84c6060d755c1c784b6e50b..f264c5c05d74743bc9bffa69562254d4fcb0a0f2 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon.c @@ -142,17 +142,18 @@ void kai_run_rhs_pack_nxk_qsi4cxps1s0_qsu4cxs1s0_neon( // Load the 2 u4 values from source const uint8_t dst_byte = src_row[(col_idx + kr_block_idx) / 2]; + // NOLINTBEGIN(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) // extract i8 values from the 2 u4 values - const uint8_t first_value = (dst_byte & 0xF) - rhs_zero_point; - const uint8_t second_value = + const int8_t first_value = (dst_byte & 0xF) - rhs_zero_point; + const int8_t second_value = col_idx + kr_block_idx + 1 >= k ? 0 : (dst_byte >> 4) - rhs_zero_point; // Add the i4 value to the row sum sum += (int32_t)first_value + (int32_t)second_value; // Truncate i8 to i4 and write to dst - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) dst_kr_block[kr_block_idx / 2] = (second_value << 4) | (first_value & 0xF); + // NOLINTEND(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) } // Go to the next kr block for this row in the nr rows diff --git a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp index d9850f3d516cd052a5f2dbbde58db7c3ca76e059..37c37c39c601c8196fc7c8433b7cb5970dd65bc0 100644 --- a/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp +++ b/test/tests/matmul_clamp_f32_qai8dxp_qsi4cxp_test.cpp @@ -290,10 +290,9 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsi4cx) { // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); const auto ref_biases = fill_random(N, seed + 2); - std::uniform_real_distribution dist(-10.0, 1.0); + std::uniform_real_distribution dist(-10.0, 1.0); std::mt19937 rnd(seed + 1); - const auto ref_rhs = fill_matrix_raw(1, N * K, [&dist, &rnd](size_t, size_t) { return dist(rnd); }); // Runs the reference implementation. @@ -421,7 +420,11 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_nxk_qsu4cx) { // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); - const auto ref_rhs = fill_random(N * K, seed + 1); + + std::uniform_real_distribution dist(-10.0, 1.0); + std::mt19937 rnd(seed + 1); + const auto ref_rhs = fill_matrix_raw(1, N * K, [&dist, &rnd](size_t, size_t) { return dist(rnd); }); + const auto ref_biases = fill_random(N, seed + 2); // Runs the reference implementation. @@ -548,7 +551,11 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsi4cx) { // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); - const auto ref_rhs = fill_random(N * K, seed + 1); + + std::uniform_real_distribution dist(-10.0, 1.0); + std::mt19937 rnd(seed + 1); + const auto ref_rhs = fill_matrix_raw(1, N * K, [&dist, &rnd](size_t, size_t) { return dist(rnd); }); + const auto ref_biases = fill_random(N, seed + 2); // Transposed(nxk) RHS dimensions @@ -679,7 +686,11 @@ TEST_P(MatMulTest_f32_qai8dxp_qsi4cxp, EndToEnd_RHS_kxn_qsu4cx) { // Generates input data. const auto ref_lhs = fill_random(M * K, seed + 0); - const auto ref_rhs = fill_random(N * K, seed + 1); + + std::uniform_real_distribution dist(-10.0, 1.0); + std::mt19937 rnd(seed + 1); + const auto ref_rhs = fill_matrix_raw(1, N * K, [&dist, &rnd](size_t, size_t) { return dist(rnd); }); + const auto ref_biases = fill_random(N, seed + 2); // Transposed(nxk) RHS dimensions