From 8860266608893be98bf785de995a906a76fe1c50 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Mon, 16 Sep 2024 14:33:39 +0100 Subject: [PATCH 1/2] Fix clang-tidy warnings * This patch also makes clang-tidy job failed when there is any warning in the library code. Signed-off-by: Viet-Hoa Do Signed-off-by: Anton Bondarenko --- .clang-tidy | 50 ++++++++----------- .editorconfig | 4 ++ .gitlab-ci.yml | 10 ++-- kai/kai_common.h | 4 +- ..._f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c | 1 - ...mp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c | 1 - ...f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c | 4 +- ...i8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c | 3 +- ...i8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c | 3 +- ..._qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c | 3 +- ..._qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c | 3 +- ...2_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c | 5 +- ...2_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c | 5 +- ...2_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c | 3 +- ...2_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c | 5 +- ...8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c | 2 +- ...qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c | 3 +- .../pack/kai_lhs_quant_pack_qsi8d32p_f32.c | 8 +-- ...hs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c | 12 ++--- .../kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c | 11 ++-- .../pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c | 1 - .../kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c | 7 ++- ...s_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c | 3 +- .../pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c | 1 - 24 files changed, 75 insertions(+), 77 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 1da1dcfd..b2f304c5 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -4,33 +4,23 @@ # SPDX-License-Identifier: Apache-2.0 # --- -Checks: ' --*, -bugprone-*, -cert-*, -clang-analyzer-*, -clang-diagnostic-*, -cppcoreguidelines-*, --cppcoreguidelines-owning-memory, -google-*, --google-readability-todo, -llvm-*, --llvm-include-order, -misc-*, -modernize-*, --modernize-use-trailing-return-type, -performance-*, -readability-*, --readability-identifier-length, --readability-magic-numbers, --readability-function-cognitive-complexity, --cppcoreguidelines-pro-type-reinterpret-cast, --cppcoreguidelines-avoid-magic-numbers, --readability-simplify-boolean-expr, --bugprone-easily-swappable-parameters, --cppcoreguidelines-pro-bounds-pointer-arithmetic, --performance-enum-size, --llvm-else-after-return, --readability-else-after-return, -' -... +Checks: > + -*, + bugprone-*, + cert-*, + clang-analyzer-*, + clang-diagnostic-*, + cppcoreguidelines-*, + google-*, + llvm-*, + misc-*, + modernize-*, + performance-*, + readability-*, + + -bugprone-easily-swappable-parameters, + -cppcoreguidelines-avoid-magic-numbers, + -llvm-header-guard, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-magic-numbers, diff --git a/.editorconfig b/.editorconfig index 8b3a3b25..7e95757f 100644 --- a/.editorconfig +++ b/.editorconfig @@ -24,6 +24,10 @@ indent_size = unset [*.{json,yml,yaml}] indent_size = 2 +# YAML files as well. +[.clang-{tidy,format}] +indent_size = 2 + # Override settings. [LICENSES/*] indent_size = unset diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e8b0b2f3..9f8fff72 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -142,13 +142,15 @@ test-examples: paths: - ${EXAMPLE}.log -clang-tidy-checks: +test-clang-tidy: extends: - .standard-rules - stage: build + stage: test + needs: [] script: - - cmake -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DKLEIDIAI_BUILD_TESTS=ON -DKLEIDIAI_ENABLE_CLANG_TIDY=ON -S . -B ${CI_JOB_NAME_SLUG} - - cmake --build ${CI_JOB_NAME_SLUG} + - cmake -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DKLEIDIAI_BUILD_TESTS=ON -DKLEIDIAI_BUILD_BENCHMARK=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -B build/${CI_JOB_NAME_SLUG} + # Only test the main library. + - run-clang-tidy -header-filter ".*" -warnings-as-errors "*" -p build/${CI_JOB_NAME_SLUG} kai pre-commit-hooks: variables: diff --git a/kai/kai_common.h b/kai/kai_common.h index 27034185..b8f64f5d 100644 --- a/kai/kai_common.h +++ b/kai/kai_common.h @@ -92,7 +92,7 @@ inline static float kai_cast_f32_f16(uint16_t f16) { /// @return the f32 value inline static float kai_cast_f32_bf16(uint16_t bf16) { const uint32_t i32 = (bf16 << 16); - float f32; + float f32 = 0; memcpy(&f32, &i32, sizeof(i32)); return f32; } @@ -119,7 +119,7 @@ inline static uint16_t kai_cast_bf16_f32(float f32) { #if defined(__ARM_NEON) inline static uint16_t kai_cast_f16_f32(float f32) { uint16_t f16 = 0; - __fp16 tmp = f32; + __fp16 tmp = (__fp16)f32; memcpy(&f16, &tmp, sizeof(uint16_t)); return f16; } diff --git a/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c b/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c index 9f9d5a75..90335b31 100644 --- a/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c @@ -11,7 +11,6 @@ #include "kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.h" -#include #include #include diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c index 62723018..fb380100 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c @@ -10,7 +10,6 @@ #include "kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.h" -#include #include #include diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c b/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c index 5611fad1..2836121f 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c @@ -76,8 +76,8 @@ void kai_run_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa( const void* B; void* C; - long ldcb; - long M, N, K; + uint64_t ldcb; + uint64_t M, N, K; float min; float max; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index d0a3f7de..532eb1ec 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -108,7 +108,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotp void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod( size_t m, size_t n, size_t k, size_t bl, const void* restrict lhs_packed, const void* restrict rhs_packed, - float* restrict dst, size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { + float* restrict dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c index ea48678e..7f6f9e86 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c @@ -108,7 +108,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotp void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod( size_t m, size_t n, size_t k, size_t bl, const void* restrict lhs_packed, const void* restrict rhs_packed, - float* restrict dst, size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { + float* restrict dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c index 5e84e870..33ad7c36 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c @@ -107,7 +107,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm( - size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, float* dst, + size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c index f64249f1..62ffeb87 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c @@ -107,7 +107,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm( - size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, float* dst, + size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c index 4f8edd1e..9b27c742 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c @@ -99,8 +99,9 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, float* dst, size_t dst_stride_row, - size_t dst_stride_col, float scalar_min, float scalar_max) { + size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); if (m == 0) { diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c index 7e40839e..78531631 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c @@ -98,8 +98,9 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, float* dst, size_t dst_stride_row, - size_t dst_stride_col, float scalar_min, float scalar_max) { + size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); if (m == 0) { diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c index fff2d5aa..51585fe0 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c @@ -98,7 +98,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c index e9f96a3a..b310bf74 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c @@ -98,8 +98,9 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, float* dst, size_t dst_stride_row, - size_t dst_stride_col, float scalar_min, float scalar_max) { + size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); if (m == 0) { diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index e41de7f6..37ee526c 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -125,7 +125,7 @@ void kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod( const size_t num_rows = m; const size_t lhs_packed_stride = kai_lhs_packed_stride(k, bl); - const int8x16_t nibble_mask = vdupq_n_s8(0xF0); + const int8x16_t nibble_mask = vreinterpretq_s8_u8(vdupq_n_u8(0xF0)); const uint8_t* lhs_ptr_start = lhs_packed; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c index 78c9ec0b..c3ce0882 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c @@ -10,6 +10,7 @@ #include "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" #include +#include #include #include "kai/kai_common.h" @@ -125,7 +126,7 @@ void kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm( const size_t num_cols = n; const size_t num_rows = m; - const int8x16_t nibble_mask = vdupq_n_s8(0xF0); + const int8x16_t nibble_mask = vreinterpretq_s8_u8(vdupq_n_u8(0xF0)); const uint8_t* lhs_ptr_start = lhs_packed; diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c index a391335d..e6c32004 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c @@ -91,20 +91,20 @@ void kai_run_lhs_quant_pack_qsi8d32p_f32( // Calculate scale and reciprocal const float scale = abs_max / ((1 << 7) - 1); - const float rep_scale = scale ? 1.0f / scale : 0.0f; + const float rep_scale = scale ? 1.0F / scale : 0.0F; // Quantize and pack the block for (size_t k_idx = 0; k_idx < bl; k_idx += k_block_len) { - for (size_t k_block_idx = 0; k_block_idx < (size_t)k_block_len; ++k_block_idx) { + for (size_t k_block_idx = 0; k_block_idx < k_block_len; ++k_block_idx) { // Clamp at the last valid k-index - const size_t k_idx_start = KAI_MIN((size_t)k_idx + k_block_idx, k - 1); + const size_t k_idx_start = KAI_MIN(k_idx + k_block_idx, k - 1); const float src0_0 = *(src_ptr + k_idx_start); // Scale the values int32_t v0_s32 = (int32_t)(roundf(src0_0 * rep_scale)); - *((int8_t*)(dst_ptr)) = (int8_t)v0_s32; + *dst_ptr = (int8_t)v0_s32; dst_ptr += sizeof(int8_t); } dst_ptr += (mr - 1) * k_block_len * sizeof(int8_t); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c index 015e70ba..5ce709fa 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c @@ -15,8 +15,8 @@ static const size_t kai_nr = 2; static const size_t kai_kr = 1; -static const size_t kai_num_bytes_data = sizeof(uint32_t); -static const size_t kai_num_bytes_bias = sizeof(uint32_t); +static const size_t kai_data_size_in_bytes = sizeof(uint32_t); +static const size_t kai_bias_size_in_bytes = sizeof(uint32_t); size_t kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(void) { return kai_nr * kai_get_sme_vector_length_u32(); @@ -25,21 +25,21 @@ size_t kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(void) { size_t kai_get_rhs_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx) { KAI_ASSUME(n_idx % (kai_nr * kai_get_sme_vector_length_u32()) == 0); - return n_idx * kai_num_bytes_data; + return n_idx * kai_data_size_in_bytes; } size_t kai_get_bias_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx) { - return n_idx * kai_num_bytes_bias; + return n_idx * kai_bias_size_in_bytes; } size_t kai_get_rhs_packed_stride_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t k) { - return kai_nr * kai_get_sme_vector_length_u32() * (kai_num_bytes_bias + k * kai_num_bytes_data); + return kai_nr * kai_get_sme_vector_length_u32() * (kai_bias_size_in_bytes + k * kai_data_size_in_bytes); } size_t kai_get_rhs_packed_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx, size_t k) { KAI_ASSUME(n_idx % (kai_nr * kai_get_sme_vector_length_u32()) == 0); - return n_idx * (kai_num_bytes_bias + k * kai_num_bytes_data); + return n_idx * (kai_bias_size_in_bytes + k * kai_data_size_in_bytes); } size_t kai_get_rhs_packed_size_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n, size_t k) { diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c index ece342eb..e556d7ab 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.h" -#include #include #include #include @@ -233,8 +232,8 @@ void kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0( const uint8_t src_x0_lo = (byte0 & 0x0F); const uint8_t src_x0_hi = (byte1 & 0x0F); - sums[nr_idx] += ((int32_t)src_x0_lo - rhs_zero_point) * d; - sums[nr_idx] += ((int32_t)src_x0_hi - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_lo - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_hi - rhs_zero_point) * d; const uint8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); @@ -243,8 +242,8 @@ void kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0( const uint8_t src_x1_lo = (byte0 >> 4); const uint8_t src_x1_hi = (byte1 >> 4); - sums[nr_idx] += ((int32_t)src_x1_lo - rhs_zero_point) * d; - sums[nr_idx] += ((int32_t)src_x1_hi - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x1_lo - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x1_hi - rhs_zero_point) * d; const uint8_t dst_qs1 = src_x1_lo | (src_x1_hi << 4); @@ -270,6 +269,6 @@ void kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0( } // Move the pointer after the biases - dst_row += kai_num_bytes_bias * nr; + // dst_row += kai_num_bytes_bias * nr; } } diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c index 491a7a34..596f2372 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.h" -#include #include #include #include diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c index f304324a..61df10b5 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.h" -#include #include #include #include @@ -251,8 +250,8 @@ void kai_run_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0( break; } - sums[nr_idx] += ((int32_t)src_x0_lo - rhs_zero_point) * d; - sums[nr_idx] += ((int32_t)src_x0_hi - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_lo - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_hi - rhs_zero_point) * d; const uint8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); @@ -277,6 +276,6 @@ void kai_run_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0( } // Move the pointer after the biases - dst_row += kai_num_bytes_bias * nr; + // dst_row += kai_num_bytes_bias * nr; } } diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c index 475d781d..5d2f8454 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h" -#include #include #include #include @@ -106,7 +105,7 @@ void kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0( const size_t num_bytes_per_segment = kr / 2; for (size_t y = 0; y < n; y += nr) { - const uint8_t* src_row = (const uint8_t*)rhs + y * rhs_stride; + const uint8_t* src_row = rhs + y * rhs_stride; uint8_t* dst_row = (uint8_t*)rhs_packed + (y / nr) * rhs_packed_stride; for (size_t x = 0; x < num_blocks_per_row; ++x) { diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c index 5a128f2e..104e6a71 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.h" -#include #include #include #include -- GitLab From 4b65390e3b038d6fc1b7740cbbec6a2e2eda48c3 Mon Sep 17 00:00:00 2001 From: Anton Bondarenko Date: Fri, 4 Oct 2024 13:42:08 +0200 Subject: [PATCH 2/2] fix/suppress new lint warning after rebase Signed-off-by: Anton Bondarenko --- kai/kai_common.h | 5 ++++- ...tmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c | 1 + ...amp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c | 3 ++- ...mp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c | 3 ++- ...mp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c | 3 ++- ...p_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c | 3 ++- ...mp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c | 3 ++- .../pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c | 2 +- .../pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c | 3 --- .../matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c | 7 ++++--- .../pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c | 3 --- .../matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c | 11 +++++------ 12 files changed, 25 insertions(+), 22 deletions(-) diff --git a/kai/kai_common.h b/kai/kai_common.h index b8f64f5d..944f4d06 100644 --- a/kai/kai_common.h +++ b/kai/kai_common.h @@ -158,7 +158,10 @@ inline static uint64_t kai_get_sme_vector_length_u32(void) { /// /// @return the int8_t value with sign extended inline static int8_t kai_ext_sign_i8_i4(int8_t value) { - return (value ^ 0x8) - 8; + // Make sure value holds correct int4 value + KAI_ASSERT(value <= 0xF); + + return (value ^ 0x8) - 8; // NOLINT(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) } #ifdef __cplusplus diff --git a/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c b/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c index 929e3753..4934496f 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "kai/kai_common.h" diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c index 92f8a56d..7b400442 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c @@ -107,7 +107,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8m } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm( - size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, float* dst, + size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c index e7cd60a7..e44a5fd9 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c @@ -99,7 +99,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotpr } void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c index 0bde1f8b..54d63ae7 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c @@ -99,7 +99,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotpr } void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c index 2ede94d6..1f24fb46 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c @@ -98,7 +98,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotp } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c index abe6f19f..99edd2c1 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c @@ -98,7 +98,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotpr } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c index 2e9685b8..d03359ca 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c @@ -62,7 +62,7 @@ void kai_run_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme( const void* in = rhs; void* out = rhs_packed; const size_t in_stride = rhs_stride; - size_t out_stride = kai_nr * kai_get_sme_vector_length_u8() * (height + sizeof(uint32_t) / sizeof(uint32_t)); + size_t out_stride = kai_nr * kai_get_sme_vector_length_u8() * (height + 1); __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c index e556d7ab..ec191da4 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c @@ -267,8 +267,5 @@ void kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0( ((float*)dst_row)[i] = bias[src_row_idx]; } } - - // Move the pointer after the biases - // dst_row += kai_num_bytes_bias * nr; } } diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c index 596f2372..46221874 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c @@ -104,6 +104,8 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( const size_t src_addr_byte0 = (n0_valid_idx / 2) + k0_idx * rhs_stride; const size_t src_addr_byte1 = (n0_valid_idx / 2) + k1_idx * rhs_stride; + const size_t shift_right_x0 = (n0_idx % 2) * 4; + if (params->rhs_zero_point == 8) { uint8_t byte0 = rhs_zero_point | rhs_zero_point << 4; uint8_t byte1 = rhs_zero_point | rhs_zero_point << 4; @@ -134,7 +136,6 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( src_x0_hi = (byte1 >> 4); } */ - const size_t shift_right_x0 = (n0_idx % 2) * 4; const uint8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; const uint8_t src_x0_hi = (byte1 >> shift_right_x0) & 0x0F; @@ -149,6 +150,7 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( int8_t byte0 = 0; int8_t byte1 = 0; + // NOLINTBEGIN(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) if (k0_idx < k) { byte0 = rhs[src_addr_byte0]; } @@ -160,12 +162,11 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( // The logic behind the following operations where we extract the // values from the bytes is same as unsigned - const size_t shift_right_x0 = (n0_idx % 2) * 4; - int8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; int8_t src_x0_hi = (byte1 >> shift_right_x0) & 0x0F; const int8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); + // NOLINTEND(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) *(int8_t*)dst_row = dst_qs0; dst_row += sizeof(int8_t); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c index 61df10b5..26953967 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c @@ -274,8 +274,5 @@ void kai_run_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0( ((float*)dst_row)[i] = bias[src_row_idx]; } } - - // Move the pointer after the biases - // dst_row += kai_num_bytes_bias * nr; } } diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c index 104e6a71..19887d09 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c @@ -101,6 +101,9 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( const size_t src_addr_byte0 = (k0_idx / 2) + n0_valid_idx * rhs_stride; const size_t src_addr_byte1 = (k1_idx / 2) + n0_valid_idx * rhs_stride; + const size_t shift_right_x0 = (k0_idx % 2) * 4; + const size_t shift_right_x1 = (k1_idx % 2) * 4; + if (params->rhs_zero_point == 8) { uint8_t byte0 = rhs_zero_point | rhs_zero_point << 4; uint8_t byte1 = rhs_zero_point | rhs_zero_point << 4; @@ -131,9 +134,6 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( src_x0_hi = (byte1 >> 4); } */ - const size_t shift_right_x0 = (k0_idx % 2) * 4; - const size_t shift_right_x1 = (k1_idx % 2) * 4; - const uint8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; const uint8_t src_x0_hi = (byte1 >> shift_right_x1) & 0x0F; @@ -147,6 +147,7 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( int8_t byte0 = 0; int8_t byte1 = 0; + // NOLINTBEGIN(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) if (k0_idx < k) { byte0 = rhs[src_addr_byte0]; } @@ -158,13 +159,11 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( // The logic behind the following operations where we extract the // values from the bytes is same as unsigned - const size_t shift_right_x0 = (k0_idx % 2) * 4; - const size_t shift_right_x1 = (k1_idx % 2) * 4; - int8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; int8_t src_x0_hi = (byte1 >> shift_right_x1) & 0x0F; const int8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); + // NOLINTEND(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) *(int8_t*)dst_row = dst_qs0; dst_row += sizeof(int8_t); -- GitLab