diff --git a/.clang-tidy b/.clang-tidy index 1da1dcfd48c2a28e2284deb46ce2b12cabfa9854..b2f304c5498279d31078465317912dc36f07ed40 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -4,33 +4,23 @@ # SPDX-License-Identifier: Apache-2.0 # --- -Checks: ' --*, -bugprone-*, -cert-*, -clang-analyzer-*, -clang-diagnostic-*, -cppcoreguidelines-*, --cppcoreguidelines-owning-memory, -google-*, --google-readability-todo, -llvm-*, --llvm-include-order, -misc-*, -modernize-*, --modernize-use-trailing-return-type, -performance-*, -readability-*, --readability-identifier-length, --readability-magic-numbers, --readability-function-cognitive-complexity, --cppcoreguidelines-pro-type-reinterpret-cast, --cppcoreguidelines-avoid-magic-numbers, --readability-simplify-boolean-expr, --bugprone-easily-swappable-parameters, --cppcoreguidelines-pro-bounds-pointer-arithmetic, --performance-enum-size, --llvm-else-after-return, --readability-else-after-return, -' -... +Checks: > + -*, + bugprone-*, + cert-*, + clang-analyzer-*, + clang-diagnostic-*, + cppcoreguidelines-*, + google-*, + llvm-*, + misc-*, + modernize-*, + performance-*, + readability-*, + + -bugprone-easily-swappable-parameters, + -cppcoreguidelines-avoid-magic-numbers, + -llvm-header-guard, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-magic-numbers, diff --git a/.editorconfig b/.editorconfig index 8b3a3b254ce64477fd36659fddeba88c7fe15b97..7e95757f518756e8eca93e4f3001d967f1dd4d27 100644 --- a/.editorconfig +++ b/.editorconfig @@ -24,6 +24,10 @@ indent_size = unset [*.{json,yml,yaml}] indent_size = 2 +# YAML files as well. +[.clang-{tidy,format}] +indent_size = 2 + # Override settings. [LICENSES/*] indent_size = unset diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e8b0b2f352bedf71aa3b39cd03046bcc6c847d60..9f8fff7257b7295ae8698e3ce282bb83a1314b02 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -142,13 +142,15 @@ test-examples: paths: - ${EXAMPLE}.log -clang-tidy-checks: +test-clang-tidy: extends: - .standard-rules - stage: build + stage: test + needs: [] script: - - cmake -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DKLEIDIAI_BUILD_TESTS=ON -DKLEIDIAI_ENABLE_CLANG_TIDY=ON -S . -B ${CI_JOB_NAME_SLUG} - - cmake --build ${CI_JOB_NAME_SLUG} + - cmake -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DKLEIDIAI_BUILD_TESTS=ON -DKLEIDIAI_BUILD_BENCHMARK=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -B build/${CI_JOB_NAME_SLUG} + # Only test the main library. + - run-clang-tidy -header-filter ".*" -warnings-as-errors "*" -p build/${CI_JOB_NAME_SLUG} kai pre-commit-hooks: variables: diff --git a/kai/kai_common.h b/kai/kai_common.h index 2703418542324f4c7a69635a88aae45a86d9d2df..944f4d069b61b31839871cd7be2bea47c9edf45d 100644 --- a/kai/kai_common.h +++ b/kai/kai_common.h @@ -92,7 +92,7 @@ inline static float kai_cast_f32_f16(uint16_t f16) { /// @return the f32 value inline static float kai_cast_f32_bf16(uint16_t bf16) { const uint32_t i32 = (bf16 << 16); - float f32; + float f32 = 0; memcpy(&f32, &i32, sizeof(i32)); return f32; } @@ -119,7 +119,7 @@ inline static uint16_t kai_cast_bf16_f32(float f32) { #if defined(__ARM_NEON) inline static uint16_t kai_cast_f16_f32(float f32) { uint16_t f16 = 0; - __fp16 tmp = f32; + __fp16 tmp = (__fp16)f32; memcpy(&f16, &tmp, sizeof(uint16_t)); return f16; } @@ -158,7 +158,10 @@ inline static uint64_t kai_get_sme_vector_length_u32(void) { /// /// @return the int8_t value with sign extended inline static int8_t kai_ext_sign_i8_i4(int8_t value) { - return (value ^ 0x8) - 8; + // Make sure value holds correct int4 value + KAI_ASSERT(value <= 0xF); + + return (value ^ 0x8) - 8; // NOLINT(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) } #ifdef __cplusplus diff --git a/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c b/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c index 9f9d5a753284e88bf9bf4ac714d31fc9888ca20c..90335b31fa8660ca7c709ac86083dadee6e99fbf 100644 --- a/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f16_f16_f16p/kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.c @@ -11,7 +11,6 @@ #include "kai_matmul_clamp_f16_f16_f16p16x1biasf16_6x16x8_neon_mla.h" -#include #include #include diff --git a/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c b/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c index 929e3753a90c40bb64072e34f2671d97b211bc53..4934496fea75e704bedae0b576ba50e81eaf6e47 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p_bf16p12x4b_8x12x4_neon_mmla.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "kai/kai_common.h" diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c index 62723018c615cc854912c4d450a96cb62a9f124e..fb380100906f6df59386a857d96b89a441ce7186 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.c @@ -10,7 +10,6 @@ #include "kai_matmul_clamp_f32_f32_f32p8x1biasf32_6x8x4_neon_mla.h" -#include #include #include diff --git a/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c b/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c index 5611fad12f1802859f18d279469eb388b124c2a0..2836121f1216d8ab5dbad68c2f98cdf3db841045 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_f32p_f32p/kai_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa.c @@ -76,8 +76,8 @@ void kai_run_matmul_clamp_f32_f32p2vlx1_f32p2vlx1biasf32_sme2_mopa( const void* B; void* C; - long ldcb; - long M, N, K; + uint64_t ldcb; + uint64_t M, N, K; float min; float max; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index d0a3f7de0fd65720ea5bc30488b185b948cd9488..532eb1ec33e9fe40fe2e7eff117ba37c0bcf6357 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -108,7 +108,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotp void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod( size_t m, size_t n, size_t k, size_t bl, const void* restrict lhs_packed, const void* restrict rhs_packed, - float* restrict dst, size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { + float* restrict dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c index ea48678ece9ce961c251bd6a73f5ace5f73e7a54..7f6f9e86b1a00aec4a2bdcd05b616d845ba68a8f 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod.c @@ -108,7 +108,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotp void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod( size_t m, size_t n, size_t k, size_t bl, const void* restrict lhs_packed, const void* restrict rhs_packed, - float* restrict dst, size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { + float* restrict dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c index 92f8a56d63433d2a2147d1f8a205a754e1122bb5..7b4004427e638343ab4dc4724d66b4a1a865aab8 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm.c @@ -107,7 +107,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8m } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_16x4x32_neon_i8mm( - size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, float* dst, + size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c index 5e84e87099917affd92cb9df86fb16b87c3dc035..33ad7c3696f0a3d0683fec6b541f3bbc9f3102ac 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c @@ -107,7 +107,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm( - size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, float* dst, + size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c index f64249f137976a3e1b2501213a1c13d2dea321d5..62ffeb87f842e86e9ad3c97160e68e263df4747c 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4c32p/kai_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm.c @@ -107,7 +107,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p8x8_4x8x32_neon_i8mm( - size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, float* dst, + size_t m, size_t n, size_t k, size_t bl, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT((bl % kai_kr) == 0); KAI_ASSERT((bl % kai_bl_multiple_of) == 0); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c index e7cd60a7f0bf71398eba822f4562f3a85a06a203..e44a5fd945065b15ec71294a7bf1a973513ee9b6 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.c @@ -99,7 +99,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotpr } void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c index 0bde1f8beae3fde36c0679fdb31394f01c5ee2a4..54d63ae74d3fae56f11914c53bccadeea8bd8266 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.c @@ -99,7 +99,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotpr } void kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c index 2ede94d6f677c2f05b76410a22920a9e51d81227..1f24fb465f89e526dca477bcd1acb55d92ddf1fb 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod.c @@ -98,7 +98,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotp } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x4_16x4x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c index 4f8edd1e5ba8e3c0622f05cb8f2cc3eca69d0297..9b27c742b6eb4e542752db28bce2ba6b59cf27e0 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm.c @@ -99,8 +99,9 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_4x4x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, float* dst, size_t dst_stride_row, - size_t dst_stride_col, float scalar_min, float scalar_max) { + size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); if (m == 0) { diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c index 7e40839e63d15b19a082461b033624ff7b22a5aa..7853163113d5ead9598cb9fb273bb0e2e485686a 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm.c @@ -98,8 +98,9 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp4x8_8x4x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, float* dst, size_t dst_stride_row, - size_t dst_stride_col, float scalar_min, float scalar_max) { + size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); if (m == 0) { diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c index abe6f19fe2c613e88b63f73b96b4f7c8158f773c..99edd2c116d95fd55a3c23104516b83ce96e108e 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod.c @@ -98,7 +98,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotpr } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x4_8x8x32_neon_dotprod( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c index fff2d5aa35bca78b0de4aecc0025abfaa0c59d1d..51585fe0b5522c5f2b5caa64938cb053df6a4dce 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm.c @@ -98,7 +98,8 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_4x8x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, float* restrict dst, + size_t m, size_t n, size_t k, const void* restrict lhs_packed, const void* restrict rhs_packed, + float* restrict dst, // NOLINT(readability-non-const-parameter) size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c index e9f96a3ab2067fcb36da99bbba09ae66b69f30ac..b310bf748e42e1a4de205fbaa592be00daa25492 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi4cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm.c @@ -98,8 +98,9 @@ size_t kai_get_dst_size_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm( } void kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm( - size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, float* dst, size_t dst_stride_row, - size_t dst_stride_col, float scalar_min, float scalar_max) { + size_t m, size_t n, size_t k, const void* lhs_packed, const void* rhs_packed, + float* dst, // NOLINT(readability-non-const-parameter) + size_t dst_stride_row, size_t dst_stride_col, float scalar_min, float scalar_max) { KAI_ASSERT(dst_stride_col == sizeof(float)); if (m == 0) { diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c index e41de7f6446c19f5253ee3707cd611c09d8103e8..37ee526cf0202f23342a686d5ebddcc88e10b93d 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c @@ -125,7 +125,7 @@ void kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod( const size_t num_rows = m; const size_t lhs_packed_stride = kai_lhs_packed_stride(k, bl); - const int8x16_t nibble_mask = vdupq_n_s8(0xF0); + const int8x16_t nibble_mask = vreinterpretq_s8_u8(vdupq_n_u8(0xF0)); const uint8_t* lhs_ptr_start = lhs_packed; diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c index 78c9ec0b90884e18f16d9b589006b80b3e152c84..c3ce0882b35bed55d20c31b6660cf6c711adfd98 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c @@ -10,6 +10,7 @@ #include "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h" #include +#include #include #include "kai/kai_common.h" @@ -125,7 +126,7 @@ void kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm( const size_t num_cols = n; const size_t num_rows = m; - const int8x16_t nibble_mask = vdupq_n_s8(0xF0); + const int8x16_t nibble_mask = vreinterpretq_s8_u8(vdupq_n_u8(0xF0)); const uint8_t* lhs_ptr_start = lhs_packed; diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c index a391335df0224fca1d81b850703513d8fb09ca2a..e6c32004252a4fd1da62982897b922d151975bb5 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c @@ -91,20 +91,20 @@ void kai_run_lhs_quant_pack_qsi8d32p_f32( // Calculate scale and reciprocal const float scale = abs_max / ((1 << 7) - 1); - const float rep_scale = scale ? 1.0f / scale : 0.0f; + const float rep_scale = scale ? 1.0F / scale : 0.0F; // Quantize and pack the block for (size_t k_idx = 0; k_idx < bl; k_idx += k_block_len) { - for (size_t k_block_idx = 0; k_block_idx < (size_t)k_block_len; ++k_block_idx) { + for (size_t k_block_idx = 0; k_block_idx < k_block_len; ++k_block_idx) { // Clamp at the last valid k-index - const size_t k_idx_start = KAI_MIN((size_t)k_idx + k_block_idx, k - 1); + const size_t k_idx_start = KAI_MIN(k_idx + k_block_idx, k - 1); const float src0_0 = *(src_ptr + k_idx_start); // Scale the values int32_t v0_s32 = (int32_t)(roundf(src0_0 * rep_scale)); - *((int8_t*)(dst_ptr)) = (int8_t)v0_s32; + *dst_ptr = (int8_t)v0_s32; dst_ptr += sizeof(int8_t); } dst_ptr += (mr - 1) * k_block_len * sizeof(int8_t); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c index 015e70bafe70a8fd24df9dbb7bd3e536efce3f04..5ce709fa256fba153637aaee3be76c991b64bb9f 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c @@ -15,8 +15,8 @@ static const size_t kai_nr = 2; static const size_t kai_kr = 1; -static const size_t kai_num_bytes_data = sizeof(uint32_t); -static const size_t kai_num_bytes_bias = sizeof(uint32_t); +static const size_t kai_data_size_in_bytes = sizeof(uint32_t); +static const size_t kai_bias_size_in_bytes = sizeof(uint32_t); size_t kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(void) { return kai_nr * kai_get_sme_vector_length_u32(); @@ -25,21 +25,21 @@ size_t kai_get_n_step_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(void) { size_t kai_get_rhs_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx) { KAI_ASSUME(n_idx % (kai_nr * kai_get_sme_vector_length_u32()) == 0); - return n_idx * kai_num_bytes_data; + return n_idx * kai_data_size_in_bytes; } size_t kai_get_bias_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx) { - return n_idx * kai_num_bytes_bias; + return n_idx * kai_bias_size_in_bytes; } size_t kai_get_rhs_packed_stride_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t k) { - return kai_nr * kai_get_sme_vector_length_u32() * (kai_num_bytes_bias + k * kai_num_bytes_data); + return kai_nr * kai_get_sme_vector_length_u32() * (kai_bias_size_in_bytes + k * kai_data_size_in_bytes); } size_t kai_get_rhs_packed_offset_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx, size_t k) { KAI_ASSUME(n_idx % (kai_nr * kai_get_sme_vector_length_u32()) == 0); - return n_idx * (kai_num_bytes_bias + k * kai_num_bytes_data); + return n_idx * (kai_bias_size_in_bytes + k * kai_data_size_in_bytes); } size_t kai_get_rhs_packed_size_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme(size_t n, size_t k) { diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c index 2e9685b8b2de7de625ebccd426a2cabc08cca63a..d03359ca54ef87985629df0e7af15d2090745a85 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme.c @@ -62,7 +62,7 @@ void kai_run_rhs_pack_kxn_f32pb_f32_f32_16vlx1_sme( const void* in = rhs; void* out = rhs_packed; const size_t in_stride = rhs_stride; - size_t out_stride = kai_nr * kai_get_sme_vector_length_u8() * (height + sizeof(uint32_t) / sizeof(uint32_t)); + size_t out_stride = kai_nr * kai_get_sme_vector_length_u8() * (height + 1); __asm__ __volatile__( ".inst 0xd503477f // SMSTART ZA\n" diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c index ece342ebd66aaaa2b1c7a323b7f15b41482c2497..ec191da45b304ca0fd4bf8077c959ea3992dd9db 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0.h" -#include #include #include #include @@ -233,8 +232,8 @@ void kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0( const uint8_t src_x0_lo = (byte0 & 0x0F); const uint8_t src_x0_hi = (byte1 & 0x0F); - sums[nr_idx] += ((int32_t)src_x0_lo - rhs_zero_point) * d; - sums[nr_idx] += ((int32_t)src_x0_hi - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_lo - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_hi - rhs_zero_point) * d; const uint8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); @@ -243,8 +242,8 @@ void kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0( const uint8_t src_x1_lo = (byte0 >> 4); const uint8_t src_x1_hi = (byte1 >> 4); - sums[nr_idx] += ((int32_t)src_x1_lo - rhs_zero_point) * d; - sums[nr_idx] += ((int32_t)src_x1_hi - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x1_lo - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x1_hi - rhs_zero_point) * d; const uint8_t dst_qs1 = src_x1_lo | (src_x1_hi << 4); @@ -268,8 +267,5 @@ void kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0( ((float*)dst_row)[i] = bias[src_row_idx]; } } - - // Move the pointer after the biases - dst_row += kai_num_bytes_bias * nr; } } diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c index 491a7a34350209ade49b3c2da2551803a8d8880d..462218746eb8ee82736200714953e2233f69f463 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_kxn_qsi4cxp_qs4cxs1s0.h" -#include #include #include #include @@ -105,6 +104,8 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( const size_t src_addr_byte0 = (n0_valid_idx / 2) + k0_idx * rhs_stride; const size_t src_addr_byte1 = (n0_valid_idx / 2) + k1_idx * rhs_stride; + const size_t shift_right_x0 = (n0_idx % 2) * 4; + if (params->rhs_zero_point == 8) { uint8_t byte0 = rhs_zero_point | rhs_zero_point << 4; uint8_t byte1 = rhs_zero_point | rhs_zero_point << 4; @@ -135,7 +136,6 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( src_x0_hi = (byte1 >> 4); } */ - const size_t shift_right_x0 = (n0_idx % 2) * 4; const uint8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; const uint8_t src_x0_hi = (byte1 >> shift_right_x0) & 0x0F; @@ -150,6 +150,7 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( int8_t byte0 = 0; int8_t byte1 = 0; + // NOLINTBEGIN(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) if (k0_idx < k) { byte0 = rhs[src_addr_byte0]; } @@ -161,12 +162,11 @@ void kai_run_rhs_pack_kxn_qsi4cxp_qs4cxs1s0( // The logic behind the following operations where we extract the // values from the bytes is same as unsigned - const size_t shift_right_x0 = (n0_idx % 2) * 4; - int8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; int8_t src_x0_hi = (byte1 >> shift_right_x0) & 0x0F; const int8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); + // NOLINTEND(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) *(int8_t*)dst_row = dst_qs0; dst_row += sizeof(int8_t); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c index f304324a34ea6ad0d72c94bdf5b16e9c0d0d18b7..269539671fae829b5cca198b405b850a5b9ad75f 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0.h" -#include #include #include #include @@ -251,8 +250,8 @@ void kai_run_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0( break; } - sums[nr_idx] += ((int32_t)src_x0_lo - rhs_zero_point) * d; - sums[nr_idx] += ((int32_t)src_x0_hi - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_lo - rhs_zero_point) * d; + sums[nr_idx] += (float)((int32_t)src_x0_hi - rhs_zero_point) * d; const uint8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); @@ -275,8 +274,5 @@ void kai_run_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0( ((float*)dst_row)[i] = bias[src_row_idx]; } } - - // Move the pointer after the biases - dst_row += kai_num_bytes_bias * nr; } } diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c index 475d781db28778030e44a4e3e807ecda1b0428c0..5d2f8454a2185e7899115f44f7f8b161bf4f4dda 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h" -#include #include #include #include @@ -106,7 +105,7 @@ void kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0( const size_t num_bytes_per_segment = kr / 2; for (size_t y = 0; y < n; y += nr) { - const uint8_t* src_row = (const uint8_t*)rhs + y * rhs_stride; + const uint8_t* src_row = rhs + y * rhs_stride; uint8_t* dst_row = (uint8_t*)rhs_packed + (y / nr) * rhs_packed_stride; for (size_t x = 0; x < num_blocks_per_row; ++x) { diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c index 5a128f2ed33d6604c6e4aaf92ad61f99aded1d66..19887d096e9e0a9f3e8c9650b8c143dfa5b75bbe 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.c @@ -5,7 +5,6 @@ // #include "kai_rhs_pack_nxk_qsi4cxp_qs4cxs1s0.h" -#include #include #include #include @@ -102,6 +101,9 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( const size_t src_addr_byte0 = (k0_idx / 2) + n0_valid_idx * rhs_stride; const size_t src_addr_byte1 = (k1_idx / 2) + n0_valid_idx * rhs_stride; + const size_t shift_right_x0 = (k0_idx % 2) * 4; + const size_t shift_right_x1 = (k1_idx % 2) * 4; + if (params->rhs_zero_point == 8) { uint8_t byte0 = rhs_zero_point | rhs_zero_point << 4; uint8_t byte1 = rhs_zero_point | rhs_zero_point << 4; @@ -132,9 +134,6 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( src_x0_hi = (byte1 >> 4); } */ - const size_t shift_right_x0 = (k0_idx % 2) * 4; - const size_t shift_right_x1 = (k1_idx % 2) * 4; - const uint8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; const uint8_t src_x0_hi = (byte1 >> shift_right_x1) & 0x0F; @@ -148,6 +147,7 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( int8_t byte0 = 0; int8_t byte1 = 0; + // NOLINTBEGIN(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) if (k0_idx < k) { byte0 = rhs[src_addr_byte0]; } @@ -159,13 +159,11 @@ void kai_run_rhs_pack_nxk_qsi4cxp_qs4cxs1s0( // The logic behind the following operations where we extract the // values from the bytes is same as unsigned - const size_t shift_right_x0 = (k0_idx % 2) * 4; - const size_t shift_right_x1 = (k1_idx % 2) * 4; - int8_t src_x0_lo = (byte0 >> shift_right_x0) & 0x0F; int8_t src_x0_hi = (byte1 >> shift_right_x1) & 0x0F; const int8_t dst_qs0 = src_x0_lo | (src_x0_hi << 4); + // NOLINTEND(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) *(int8_t*)dst_row = dst_qs0; dst_row += sizeof(int8_t);