diff --git a/CMakeLists.txt b/CMakeLists.txt index e3e3993ff698ee121339968ed7104504071a43f9..612037784f01618c6ac886e37d001c1c39460793 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ else() "-Wno-overlength-strings" "-Wstrict-overflow=2" "-Wswitch-default" + "-Wcast-qual" ) set(KLEIDIAI_WARNING_FLAGS_CXX diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c index b1911981d3616348760e09c34587393905f4d813..66db8dc7c83d2ccf23d455b705824149104d0446 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c @@ -158,10 +158,10 @@ void kai_run_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa( const size_t mr = kai_get_mr_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa(); const size_t nr = kai_get_nr_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa(); - const uint16_t* lhs_scales = - (uint16_t*)((const int8_t*)lhs_packed + lhs_packed_stride - (mr * num_blocks) * kai_num_bytes_multiplier_lhs); - const uint16_t* rhs_scales = - (uint16_t*)((const uint8_t*)rhs_packed + rhs_packed_stride - (nr * num_blocks) * kai_num_bytes_multiplier_rhs); + const uint16_t* lhs_scales = (const uint16_t*)((const int8_t*)lhs_packed + lhs_packed_stride - + (mr * num_blocks) * kai_num_bytes_multiplier_lhs); + const uint16_t* rhs_scales = (const uint16_t*)((const uint8_t*)rhs_packed + rhs_packed_stride - + (nr * num_blocks) * kai_num_bytes_multiplier_rhs); __asm__ volatile( // Switch to streaming mode with ZA enabling diff --git a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c index 002b55fab6a966ca428053c082347c7befafbd71..067bcc035ee1984d2f2bd7ba54a6978522aca004 100644 --- a/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c +++ b/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c @@ -161,10 +161,10 @@ void kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot( const size_t mr = kai_get_mr_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot(); const size_t nr = kai_get_nr_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot(); - const uint16_t* lhs_scales = - (uint16_t*)((const int8_t*)lhs_packed + lhs_packed_stride - (mr * num_blocks) * kai_num_bytes_multiplier_lhs); - const uint16_t* rhs_scales = - (uint16_t*)((const uint8_t*)rhs_packed + rhs_packed_stride - (nr * num_blocks) * kai_num_bytes_multiplier_rhs); + const uint16_t* lhs_scales = (const uint16_t*)((const int8_t*)lhs_packed + lhs_packed_stride - + (mr * num_blocks) * kai_num_bytes_multiplier_lhs); + const uint16_t* rhs_scales = (const uint16_t*)((const uint8_t*)rhs_packed + rhs_packed_stride - + (nr * num_blocks) * kai_num_bytes_multiplier_rhs); __asm__ volatile( // Switch to streaming mode with ZA enabling diff --git a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c index 7698ce5dd4cb537ff2fc5ca017b7e6007a023b4e..2eb124a0965018b2acaa52f5a3ed5617b4d6e8ce 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c +++ b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c @@ -78,10 +78,10 @@ void kai_run_lhs_pack_bf16p2vlx2_f32_sme( for (size_t block_y = 0; block_y < m; block_y += block_height) { const size_t height = KAI_MIN(m - block_y, block_height); - void* out = (void*)((char*)lhs_packed + block_y * kai_roundup(k, kai_kr) * sizeof(uint16_t)); + void* out = (char*)lhs_packed + (block_y * kai_roundup(k, kai_kr) * sizeof(uint16_t)); for (size_t y = 0; y < height; y++) { - in[y] = (void*)((char*)lhs + (block_y + y) * lhs_stride); + in[y] = (const void*)((const char*)lhs + (block_y + y) * lhs_stride); } __asm__ __volatile__( diff --git a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c index 723de695b88212ab5b2e8ff4207ffb2191967580..e025368ba4f57cbf136beed123469ce869572383 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p8x4_f16_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -69,7 +69,7 @@ void kai_run_lhs_pack_bf16p8x4_f16_neon( size_t width = k; for (size_t y = 0; y < height; y++) { - in[y] = (char*)lhs + (block_y + y) * lhs_stride; + in[y] = (const char*)lhs + (block_y + y) * lhs_stride; } __asm__ __volatile__( diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c index a53b00885252da5f0389d9ab7f5f2b0ab3ceb4fe..5d6f1587a107ec4dab40cc5941007227455228e9 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p1x4_f32_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -64,8 +64,8 @@ void kai_run_lhs_quant_pack_bf16p1x4_f32_neon( KAI_ASSUME(m_idx_start == 0); - const float* lhs_ptr = (float*)(lhs); - uint16_t* lhs_packed_ptr = (uint16_t*)(lhs_packed); + const float* lhs_ptr = lhs; + uint16_t* lhs_packed_ptr = lhs_packed; // Unroll two 256-bit loops size_t i = 0; diff --git a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c index 6022ac912205894a7ad6bbef8e85bd3f710a7af9..9e59bdd0a79bc4b1794f49c0d21238797c89e433 100644 --- a/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_lhs_quant_pack_bf16p8x4_f32_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -73,7 +73,7 @@ void kai_run_lhs_quant_pack_bf16p8x4_f32_neon( size_t width = k; for (size_t y = 0; y < height; y++) { - in[y] = (char*)lhs + (block_y + y) * lhs_stride; + in[y] = (const char*)lhs + (block_y + y) * lhs_stride; } __asm__ __volatile__( diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.c index cff2dd0f052905d4296c643017eb0ae7ed20af6a..c664bb1597e2cfba7ec7666ed2f90faa04d8d68c 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf16_f16_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -69,7 +69,7 @@ void kai_run_rhs_pack_kxn_bf16p12x4biasf16_f16_neon( const void* in = rhs; void* out = rhs_packed; const size_t in_stride = rhs_stride; - const uint16_t* pad_row = (uint16_t*)rhs; + const uint16_t* pad_row = rhs; // Fill zeros if bias is nullptr size_t bias_step = nr * sizeof(uint16_t); @@ -80,7 +80,7 @@ void kai_run_rhs_pack_kxn_bf16p12x4biasf16_f16_neon( bias_step = 0; } - const void* bias_ptr = bias == NULL ? (void*)zero_bias : (void*)bias; + const void* bias_ptr = bias == NULL ? (const void*)zero_bias : bias; size_t out_stride = kai_get_rhs_packed_stride_rhs_pack_kxn_bf16p12x4biasf16_f16_neon(height); diff --git a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c index 2c4d5e5c9b7d8b1b64655cf5dacc77bcb5492234..68b2b2576cda566df979cca3f27d79547feeae31 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p12x4biasf32_f16_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -61,7 +61,7 @@ void kai_run_rhs_pack_kxn_bf16p12x4biasf32_f16_neon( const void* in = rhs; void* out = rhs_packed; const size_t in_stride = rhs_stride; - uint16_t* pad_row = (uint16_t*)rhs; + const uint16_t* pad_row = rhs; // Fill zeros if bias is nullptr size_t bias_step = nr * sizeof(float); @@ -72,7 +72,7 @@ void kai_run_rhs_pack_kxn_bf16p12x4biasf32_f16_neon( bias_step = 0; } - const void* bias_ptr = bias == NULL ? (void*)zero_bias : (void*)bias; + const void* bias_ptr = bias == NULL ? (const void*)zero_bias : bias; size_t out_stride = kai_nr * kai_roundup(height, kai_kr) * sizeof(uint16_t) + kai_nr * sizeof(uint32_t); diff --git a/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c b/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c index 6711941834312eca4cccc9888427fb572c028043..6e0acb80207ec00a52a47e65a8804090cbb4bc8c 100644 --- a/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c +++ b/kai/ukernels/matmul/pack/kai_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon.c @@ -1,5 +1,5 @@ // -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates +// SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates // // SPDX-License-Identifier: Apache-2.0 // @@ -66,7 +66,7 @@ void kai_run_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon( size_t height = k; const size_t width = n; - const void* in = (void*)rhs; + const void* in = rhs; void* out = rhs_packed; const size_t in_stride = rhs_stride; const float* pad_row = rhs; @@ -80,7 +80,7 @@ void kai_run_rhs_quant_pack_kxn_bf16p12x4biasf32_f32_neon( bias_step = 0; } - const void* bias_ptr = bias == NULL ? (void*)zero_bias : (void*)bias; + const void* bias_ptr = bias == NULL ? (const void*)zero_bias : bias; const size_t out_stride = nr * kai_roundup(height, kr) * sizeof(uint16_t) + nr * sizeof(uint32_t); diff --git a/kai_defs.bzl b/kai_defs.bzl index 593e320a3030df147272b9e51f33fa7427af4e18..6e512b148544d0539ecbfda4c5cf4460677c7950 100644 --- a/kai_defs.bzl +++ b/kai_defs.bzl @@ -26,6 +26,7 @@ def kai_gcc_warn_copts(): "-Wstrict-overflow=2", "-Wswitch-default", "-Wno-vla", + "-Wcast-qual", ] def kai_gcc_warn_cxxopts():