diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fdf43ec62da32a922dce21af2d08fed7c2b2072..fe4f7fae41d02b753ac3edc35d8f8edea9d7df57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,11 +36,11 @@ if(KLEIDIAI_ENABLE_CLANG_TIDY) endif() if(MSVC) - set(KLEIDIAI_WARNING_FLAGS_C + set(KLEIDIAI_WARNING_FLAGS_BASE # "/Wall" - This flag is disabled until the kernel library is cleaned up. ) else() - set(KLEIDIAI_WARNING_FLAGS_C + set(KLEIDIAI_WARNING_FLAGS_BASE "-Wall" "-Wdisabled-optimization" "-Werror" @@ -53,16 +53,24 @@ else() "-Wcast-qual" ) + # C only flags not present in C++ + set(KLEIDIAI_WARNING_FLAGS_C + "-Wmissing-prototypes" + "-Wstrict-prototypes" + ) + set(KLEIDIAI_WARNING_FLAGS_CXX "-Wctor-dtor-privacy" "-Weffc++" "-Woverloaded-virtual" "-Wsign-promo" + "-Wmissing-declarations" ) endif() set(KLEIDIAI_WARNING_FLAGS - ${KLEIDIAI_WARNING_FLAGS_C} + ${KLEIDIAI_WARNING_FLAGS_BASE} + $<$:${KLEIDIAI_WARNING_FLAGS_C}> $<$:${KLEIDIAI_WARNING_FLAGS_CXX}> ) diff --git a/kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.c b/kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.c index 6a130319715495ae00ac7391c385ec97dfccf1a1..a30936cf5dc2f98752b07617d5de2467ea588bb5 100644 --- a/kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.c +++ b/kai/ukernels/matmul/matmul_clamp_f16_f16p_f16p/kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.c @@ -11,6 +11,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_matmul_clamp_f16_f16p2vlx2_f16p2vlx2_2vlx2vl_sme2_mopa.h" + #include #include #include diff --git a/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c b/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c index da28c6f0418ea8e8980fd4bd87e2a0d457eb8830..f677276fa943fadb800cbb8959ae29e583a7e4fb 100644 --- a/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c +++ b/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c @@ -11,6 +11,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c index 351a4e3a1c393bf9621f3eee5f2f944003476989..e6992357e2723c441aa99d414ec0000b7694fc87 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c +++ b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c @@ -11,6 +11,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_lhs_pack_bf16p2vlx2_f32_sme.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c index 1cc0c0713a03fc921511632f8992c706f0df90f5..ce92ba1faf7c2969cc285147d2fdb026a36479e0 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c @@ -12,6 +12,8 @@ #error This file must be compiled for AArch64, FEAT_BF16, FEAT_FP16. #else // Architectural features check. +#include "kai_lhs_pack_bf16p8x4_f16_neon.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_lhs_pack_f32p2vlx1_f32_sme.c b/kai/ukernels/matmul/pack/kai_lhs_pack_f32p2vlx1_f32_sme.c index d8b4708d8add870b8ad715c129b06d6029e0e2b3..e2636770a52284f43da864672edc0f2eb5ec10e5 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_pack_f32p2vlx1_f32_sme.c +++ b/kai/ukernels/matmul/pack/kai_lhs_pack_f32p2vlx1_f32_sme.c @@ -11,6 +11,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_lhs_pack_f32p2vlx1_f32_sme.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_lhs_pack_x16p2vlx2_x16_sme.c b/kai/ukernels/matmul/pack/kai_lhs_pack_x16p2vlx2_x16_sme.c index f16d52b281ea96bf6333f86150951e554d76391a..d865117f190b0dc79aa14b12884634a3b83d20d7 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_pack_x16p2vlx2_x16_sme.c +++ b/kai/ukernels/matmul/pack/kai_lhs_pack_x16p2vlx2_x16_sme.c @@ -11,6 +11,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_lhs_pack_x16p2vlx2_x16_sme.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c index 5d6f1587a107ec4dab40cc5941007227455228e9..fb0c4bd5d662d9030a61b6b3d3fbc1813e23fdf9 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c @@ -8,6 +8,8 @@ #error This file must be compiled for AArch64, FEAT_BF16. #else // Architectural features check. +#include "kai_lhs_quant_pack_bf16p1x4_f32_neon.h" + #include #include #include diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c index 9e59bdd0a79bc4b1794f49c0d21238797c89e433..60862dc4752105e42de65eedb9de55553000ccce 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c @@ -10,6 +10,8 @@ #define MAX_MR 8 +#include "kai_lhs_quant_pack_bf16p8x4_f32_neon.h" + #include #include #include @@ -50,8 +52,8 @@ size_t kai_get_lhs_packed_size_lhs_quant_pack_bf16p8x4_f32_neon(size_t m, size_t } void kai_run_lhs_quant_pack_bf16p8x4_f32_neon( - size_t m, size_t k, size_t mr, size_t kr, size_t sr, size_t m_idx_start, const float* lhs, size_t lhs_stride, - uint16_t* lhs_packed) { + size_t m, size_t k, size_t mr, size_t kr, size_t sr, size_t m_idx_start, const void* lhs, size_t lhs_stride, + void* lhs_packed) { KAI_ASSUME(mr == kai_mr); KAI_ASSUME(sr == kai_sr); KAI_ASSUME(kr == kai_kr); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.h b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.h index cd3863021696c92ee0f37fb61c8dc39d263ea7d5..11a6601f302aba23d76d1f572d511edd5b8bfc52 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.h +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -33,18 +33,25 @@ size_t kai_get_rhs_offset_rhs_pack_kxn_bf16p12x4biasf16_f16_neon(size_t n_idx); /// @return The offset in bytes to the data element. size_t kai_get_bias_offset_rhs_pack_kxn_bf16p12x4biasf16_f16_neon(size_t n_idx); +/// Gets the row stride in bytes to the packed RHS matrix. +/// +/// @param[in] k Number of rows. +/// +/// @return Row stride in bytes to the packed RHS matrix. +size_t kai_get_rhs_packed_stride_rhs_pack_kxn_bf16p12x4biasf16_f16_neon(size_t k); + /// Gets the offset in bytes to the data element in the packed RHS buffer. /// -/// @param[in] n_idx Row index. -/// @param[in] k Number of columns. +/// @param[in] n_idx Column index. +/// @param[in] k Number of rows. /// /// @return The offset in bytes to the data element. size_t kai_get_rhs_packed_offset_rhs_pack_kxn_bf16p12x4biasf16_f16_neon(size_t n_idx, size_t k); /// Gets the size in bytes of the packed RHS buffer. /// -/// @param[in] n Number of rows. -/// @param[in] k Number of columns. +/// @param[in] n Number of columns. +/// @param[in] k Number of rows. /// /// @return The size in bytes of the packed RHS buffer. size_t kai_get_rhs_packed_size_rhs_pack_kxn_bf16p12x4biasf16_f16_neon(size_t n, size_t k); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c index 1138a6790833e70c09f28fc05e94a1b9d6336165..5a431216126bc2e69639062198e984dfe48fc233 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c @@ -12,6 +12,8 @@ #error This file must be compiled for AArch64, FEAT_BF16, FEAT_FP16. #else // Architectural features check. +#include "kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.h" + #include #include #include diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f16p16x1biasf16_f16_f16_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f16p16x1biasf16_f16_f16_neon.c index 61b8ba480ec595d630909b915f749a3ad3be6d2b..d85533d9ee920b4290e831acb560d5ddc3e7e3b7 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f16p16x1biasf16_f16_f16_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f16p16x1biasf16_f16_f16_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -8,6 +8,8 @@ #error This file must be compiled for AArch64. #else // Architectural features check. +#include "kai_rhs_pack_kxn_f16p16x1biasf16_f16_f16_neon.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c index 5ce709fa256fba153637aaee3be76c991b64bb9f..55e4947460fb8493cd36b63f72c68c1c16f4755f 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -8,6 +8,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.c index 0c6b0074b2c751c414112861a2864d7a34604dee..afa3d8b597cd510e9116933a14c26da1b62ddbb6 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -8,6 +8,8 @@ #error This file must be compiled for AArch64. #else // Architectural features check. +#include "kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.c index 426bfc6cbc9dc836c8d05082ea60180df415634d..b5fe9904812c5078bc799ec16bf56efdda568fbe 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.c @@ -11,6 +11,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.h b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.h index 8bd3f6ee783104b625724cf524b9e471145ecbe2..77e68f6987ccd0aee9841834fc9ed4628de00c93 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.h +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -22,10 +22,10 @@ size_t kai_get_n_step_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme(void); /// Gets the offset in bytes to the data element in the RHS matrix buffer. /// /// @param[in] n_idx Column index. -/// @param[in] rhs_offset Row stride in bytes of the RHS matrix. +/// @param[in] rhs_stride Row stride in bytes of the RHS matrix. /// /// @return The offset in bytes to the data element. -size_t kai_get_rhs_offset_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx, size_t rhs_offset); +size_t kai_get_rhs_offset_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx, size_t rhs_stride); /// Gets the offset in bytes to the data element in the bias buffer. /// @@ -34,6 +34,13 @@ size_t kai_get_rhs_offset_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx /// @return The offset in bytes to the data element. size_t kai_get_bias_offset_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme(size_t n_idx); +/// Gets the row stride in bytes to the packed RHS matrix. +/// +/// @param[in] k Number of columns. +/// +/// @return Row stride in bytes to the packed RHS matrix. +size_t kai_get_rhs_packed_stride_rhs_pack_nxk_f32p2vlx1biasf32_f32_f32_sme(size_t k); + /// Gets the offset in bytes to the data element in the packed RHS buffer. /// /// @param[in] n_idx Row index. diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h index f06e736cb61ed64cdc6858127cc169111ad09b8b..a350c0af6cf7837a3a41866b008ba1475ecdcab0 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -14,6 +14,17 @@ extern "C" { #endif +/// Gets the row stride in bytes to the packed RHS matrix. +/// +/// @param[in] k The number of columns in the RHS matrix (not packed). +/// @param[in] nr The number of columns written by the matmul micro-kernel. +/// @param[in] kr The number of columns loaded in the single innermost loop of the matmul micro-kernel. +/// @param[in] bl The block length, which defines the number of K values stored in a single block. It must be equivalent +/// to 32. +/// +/// @return Row stride in bytes to the packed RHS matrix. +size_t kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0(size_t k, size_t nr, size_t kr, size_t bl); + /// Gets the offset in bytes for the RHS matrix (not packed), which holds /// the int4 values in a N x K matrix, where N is number of rows and K is the number of columns. /// diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.c b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.c index 8ca803bd188a082f1d324c1f0ccc99f33d20cd4b..d292185823789d084ec41733df292b8260bec413 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.c @@ -11,6 +11,8 @@ #error This file must be compiled for AArch64, FEAT_SVE2. #else // Architectural features check. +#include "kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.h" + #include #include diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.h b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.h index e5cbc7915ca1e83424625bfaedb1f9af1d315cc6..f9ad4b7f5b9dc97e591ccdc0542206e9bee2a323 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.h +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme.h @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -22,10 +22,10 @@ size_t kai_get_n_step_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme(void); /// Gets the offset in bytes to the data element in the RHS matrix buffer. /// /// @param[in] n_idx Column index. -/// @param[in] rhs_offset Row stride in bytes of the RHS matrix. +/// @param[in] rhs_stride Row stride in bytes of the RHS matrix. /// /// @return The offset in bytes to the data element. -size_t kai_get_rhs_offset_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme(size_t n_idx, size_t rhs_offset); +size_t kai_get_rhs_offset_rhs_pack_nxk_x16p2vlx2b_x16_x16_sme(size_t n_idx, size_t rhs_stride); /// Gets the offset in bytes to the data element in the bias buffer. /// diff --git a/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c b/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c index 78b7f9cd9803452b90e2bfaa5993ee49edf44333..e08021d0eae140d08b33956cc809636b8d84cd4c 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c @@ -13,6 +13,8 @@ #define MAX_NR 12 +#include "kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.h" + #include #include #include @@ -24,9 +26,8 @@ static const size_t kai_nr = 12; static const size_t kai_kr = 4; static const size_t kai_sr = 1; -size_t kai_get_n_step_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon(size_t nr) { - KAI_ASSUME(kai_nr == nr); - return nr; +size_t kai_get_n_step_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon(void) { + return kai_nr; } size_t kai_get_rhs_offset_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon(size_t n_idx) { diff --git a/kai_defs.bzl b/kai_defs.bzl index 66ec4414ca4e9e4501936d171d0f84b63fc94e4a..3848ba2b762a3860a3ca8e58224193f045ec5ce3 100644 --- a/kai_defs.bzl +++ b/kai_defs.bzl @@ -28,12 +28,20 @@ def kai_gcc_warn_copts(): "-Wcast-qual", ] +# GCC/CLANG C only warning options +def kai_gcc_warn_conlyopts(): + return [ + "-Wmissing-prototypes", + "-Wstrict-prototypes", + ] + def kai_gcc_warn_cxxopts(): return kai_gcc_warn_copts() + [ "-Wctor-dtor-privacy", "-Weffc++", "-Woverloaded-virtual", "-Wsign-promo", + "-Wmissing-declarations", ] # GCC/CLANG compiler options @@ -142,6 +150,7 @@ def _kai_c_cxx_common(name, copts_def_func, **kwargs): extra_copts.append("-fno-tree-vectorize") kwargs["copts"] = kwargs.get("copts", []) + copts_def_func(cpu_uarch) + extra_copts + kwargs["conlyopts"] = kai_gcc_warn_conlyopts() kwargs["deps"] = ["//:common"] + kwargs.get("deps", []) kwargs["linkstatic"] = kwargs.get("linkstatic", True) diff --git a/test/common/printer.cpp b/test/common/printer.cpp index 9fc3e0d8fb096f414b2df8d87e36efba6c23e940..eaad08a470b98d5acd10117fee72eff5402045c1 100644 --- a/test/common/printer.cpp +++ b/test/common/printer.cpp @@ -1,9 +1,11 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // +#include "test/common/printer.hpp" + #include #include #include diff --git a/test/reference/cast.cpp b/test/reference/cast.cpp index 487c2f1e0f4e7b897e4a809d1c941707d3676281..67ad0e779650e38bcc160df5dfcacd37d329a211 100644 --- a/test/reference/cast.cpp +++ b/test/reference/cast.cpp @@ -1,9 +1,11 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // +#include "test/reference/cast.hpp" + #include #include #include diff --git a/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp b/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp index 638b1dc3364ae880e45f9cf8ad21b4115c0a73f6..81ffaf4d555e7c5396f7409c313ed6e0b931d4a4 100644 --- a/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp +++ b/test/tests/matmul_clamp_qai8_qai8p_qsi8cxp_test.cpp @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -182,7 +182,7 @@ struct TestReference { }; /// Generate test reference data -TestReference get_test_reference(const MatMulShape& shape, const MatMulVariant& variant) { +static TestReference get_test_reference(const MatMulShape& shape, const MatMulVariant& variant) { // ============================================================ // Generates input and reference output data // ============================================================ @@ -282,7 +282,7 @@ TestReference get_test_reference(const MatMulShape& shape, const MatMulVariant& } /// Test LHS packing -void test_lhs_pack( +static void test_lhs_pack( const MatMulShape& shape, const MatMulVariant& variant, const Rect& output_area, const TestReference& reference) { KAI_ASSUME(variant.lhs_pack.has_value()); @@ -313,7 +313,7 @@ void test_lhs_pack( } /// Test RHS packing -void test_rhs_pack( +static void test_rhs_pack( const MatMulShape& shape, const MatMulVariant& variant, const Rect& output_area, const TestReference& reference) { const auto imp_packed_rhs_size = variant.rhs_pack.get_packed_rhs_size(shape.n, shape.k); ASSERT_EQ(imp_packed_rhs_size, reference.packed_rhs.size()); @@ -349,7 +349,7 @@ void test_rhs_pack( } /// Test MatMul of GEMM like kernel -void test_matmul( +static void test_matmul( const MatMulShape& shape, const MatMulVariant& variant, const Rect& output_area, const TestReference& reference) { const auto imp_dst_size = variant.matmul.get_dst_size(shape.m, shape.n); ASSERT_EQ(imp_dst_size, reference.dst_qsi8_clamped.size());