diff --git a/.clang-tidy b/.clang-tidy
index 90197aefc8a231846ec23d58f111f2f042322d2e..e5ada0539c7183ca304f9d1f1ab2d4a4d7a40155 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1,9 +1,8 @@
 ---
 Checks: '-*,readability*,-readability-magic-numbers,-readability-function-size,-readability-function-cognitive-complexity,-readability-identifier-length'
-WarningsAsErrors: 'readability*'
+WarningsAsErrors: '*'
 HeaderFilterRegex: '*.h,*.hpp'
-AnalyzeTemporaryDtors: false
-FormatStyle:     file
+FormatStyle: 'file'
 CheckOptions:
   - { key: readability-identifier-naming.ClassCase, value: lower_case }
   - { key: readability-identifier-naming.StructCase, value: lower_case }
@@ -62,4 +61,3 @@ CheckOptions:
   - key:             google-readability-function-size.StatementThreshold
     value:           '800'
 ...
-
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 591f72493046d60acda0ed9f07303e022b6bf124..27da4d6fe37bfa83198307ff19892e44400f86aa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.3)
-project(armral VERSION 23.10)
+project(armral VERSION 24.01)
 
 if(CMAKE_VERSION VERSION_GREATER 3.4)
   # stop CMake from automatically adding -rdynamic to linker flags
@@ -16,6 +16,7 @@ endif()
 
 option(ARMRAL_ENABLE_WERROR "Enable -Werror when building the library and tests" OFF)
 option(ARMRAL_ENABLE_ASAN "Enable AddressSanitizer when building the library and tests" OFF)
+option(ARMRAL_ENABLE_EFENCE "Enable Electric Fence when building the library and tests" OFF)
 option(ARMRAL_ENABLE_COVERAGE "Enable instrumentation for generating code coverage" OFF)
 option(BUILD_SIMULATION "Enable building channel simulation programs" ON)
 set(ARMRAL_ARCH NEON CACHE STRING "The architecture to build for ('NEON' or 'SVE2')")
@@ -120,6 +121,15 @@ if(NOT ARMRAL_OVERRIDE_COMPILE_FLAGS)
   # Note: We don't universally enable this flag, as in some cases it can cause regressions.
   set_property(SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_ahb_f32.c
     APPEND PROPERTY COMPILE_OPTIONS $<$<AND:$<CONFIG:RELEASE>,$<C_COMPILER_ID:GNU>>:-frename-registers>)
+
+  if(ARMRAL_ENABLE_WERROR)
+    # Disable warnings-as-errors about C-style Variable Length Arrays in FFT source when using Clang++
+    set_property(SOURCE
+      ${CMAKE_CURRENT_SOURCE_DIR}/src/LowerPHY/FFT/fft_execute.cpp
+      ${CMAKE_CURRENT_SOURCE_DIR}/src/LowerPHY/FFT/fft_plan.cpp
+      ${CMAKE_CURRENT_SOURCE_DIR}/src/LowerPHY/FFT/rader_generator.cpp
+      APPEND PROPERTY COMPILE_OPTIONS $<$<CXX_COMPILER_ID:Clang>:-Wno-error=vla-extension>)
+  endif()
 endif()
 
 set(ARMRAL_UTIL_SOURCES
@@ -160,7 +170,7 @@ if(ARMRAL_ENABLE_WERROR)
     message(WARNING "CMAKE_C_FLAGS and CMAKE_CXX_FLAGS manually specified. Ignoring option ARMRAL_ENABLE_WERROR")
   else()
     set(ARMRAL_COMPILER_FLAGS ${ARMRAL_COMPILER_FLAGS} -Werror)
-endif()
+  endif()
 endif()
 
 if(ARMRAL_ENABLE_ASAN)
@@ -172,12 +182,21 @@ if(ARMRAL_ENABLE_ASAN)
   endif()
 endif()
 
+if(ARMRAL_ENABLE_EFENCE)
+  if(ARMRAL_OVERRIDE_COMPILE_FLAGS)
+    message(WARNING "CMAKE_C_FLAGS and CMAKE_CXX_FLAGS manually specified. Ignoring option ARMRAL_ENABLE_EFENCE")
+  else()
+    set(ARMRAL_COMPILER_FLAGS ${ARMRAL_COMPILER_FLAGS} -lefence)
+    set(ARMRAL_LINKER_FLAGS ${ARMRAL_LINKER_FLAGS} -lefence)
+  endif()
+endif()
+
 if(ARMRAL_ENABLE_COVERAGE)
   if(ARMRAL_OVERRIDE_COMPILE_FLAGS)
     message(WARNING "CMAKE_C_FLAGS and CMAKE_CXX_FLAGS manually specified. Ignoring option ARMRAL_ENABLE_COVERAGE")
   else()
-    set(ARMRAL_COMPILER_FLAGS ${ARMRAL_COMPILER_FLAGS} --coverage)
-    set(ARMRAL_LINKER_FLAGS ${ARMRAL_LINKER_FLAGS} --coverage)
+    set(ARMRAL_COMPILER_FLAGS ${ARMRAL_COMPILER_FLAGS} --coverage -fprofile-update=atomic)
+    set(ARMRAL_LINKER_FLAGS ${ARMRAL_LINKER_FLAGS} --coverage -fprofile-update=atomic)
   endif()
 endif()
 
@@ -440,6 +459,8 @@ endfunction()
   add_armral_bench(arm_fir_filter_cf32_decimate_2 bench/FIR/FIR32Decimate2/main.cpp)
   add_armral_bench(ldpc_decoding bench/LDPC/Decoding/main.cpp)
   add_armral_bench(ldpc_encoding bench/LDPC/Encoding/main.cpp)
+  add_armral_bench(ldpc_rate_matching bench/LDPC/RateMatching/main.cpp)
+  add_armral_bench(ldpc_rate_recovery bench/LDPC/RateRecovery/main.cpp)
   add_armral_bench(matrix_inv_single_general bench/MatrixInv/Single/GeneralMatInv/main.cpp)
   add_armral_bench(matrix_inv_single_hermitian bench/MatrixInv/Single/HermitianMatInv/main.cpp)
   add_armral_bench(matrix_inv_batch_general bench/MatrixInv/Batch/GeneralMatInv/NonPA/main.cpp)
@@ -583,7 +604,7 @@ set (COMP_ERR_MSG "Compilation is only supported with GNU versions 7, 8, 9, 10,
                   11, 12, 13, or Clang versions greater than or equal to 12.0.1. \
                   If compilation fails please use one of the supported compilers.")
 if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
-  if (CMAKE_C_COMPILER_VERSION VERSION_LESS 7.1 OR CMAKE_C_COMPILER_VERSION VERSION_GREATER 13.1)
+  if (CMAKE_C_COMPILER_VERSION VERSION_LESS 7.1 OR CMAKE_C_COMPILER_VERSION VERSION_GREATER 13.2)
     message(WARNING ${COMP_ERR_MSG})
   endif()
 elseif (CMAKE_C_COMPILER_ID STREQUAL "Clang")
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a730ee50112e5f285851f6b534a87f63dee4f099..24d142aa246b5023f0936b969d8a911e909bf28d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -416,7 +416,7 @@ The following code block provides a template for the `bench.py` script.
 ```py
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
@@ -482,7 +482,7 @@ The following code block provides a basic template.
 ```cpp
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "armral.h"
diff --git a/CREDITS.md b/CREDITS.md
index 8605fae729562997bd7b71dc93499ffe36de7442..467a1b112b05d8f43c911b32c9338d75d6402d1f 100644
--- a/CREDITS.md
+++ b/CREDITS.md
@@ -2,6 +2,23 @@ In addition to the primary development being done by Arm, the
 following people and organizations have contributed to Arm RAN
 Acceleration Library:
 
+- Work on `armral_ldpc_rate_recovery` to correctly set the
+  log-likelihood ratios of filler bits was contributed upstream by
+  4g5g Consultants. See
+  https://gitlab.arm.com/networking/ral/-/merge_requests/6.
+
+- Work on `armral_ldpc_rate_matching` and `armral_ldpc_rate_recovery`
+  to support the addition and removal of filler bits when the soft
+  buffer size is less than the full buffer size was contributed
+  upstream by 4g5g Consultants. See
+  https://gitlab.arm.com/networking/ral/-/merge_requests/5.
+
+- Work on `armral_ldpc_encode_block`, `armral_ldpc_rate_matching` and
+  `armral_ldpc_rate_recovery` to support the addition and removal of
+  filler bits when the code block size is not a multiple of lifting
+  set size was contributed upstream by 4g5g Consultants. See
+  https://gitlab.arm.com/networking/ral/-/merge_requests/4
+
 - Work on `armral_seq_generator` to extend the `sequence_len`
   parameter to `uint32_t` was contributed upstream by 4g5g
   Consultants. See
@@ -17,3 +34,4 @@ Acceleration Library:
   Consultants. See
   https://gitlab.arm.com/networking/ral/-/merge_requests/1
 
+
diff --git a/Doxyfile.in b/Doxyfile.in
index 6d6ad1ae598ab317507cd177a1547237aeda303c..c470dc932f4e2c8e67c67dadad76e8e432d00a32 100644
--- a/Doxyfile.in
+++ b/Doxyfile.in
@@ -38,7 +38,7 @@ PROJECT_NAME           = "Arm RAN Acceleration Library Reference Guide"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "23.10"
+PROJECT_NUMBER         = "24.01"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/README.md b/README.md
index f8d16ad96ef30606a63455d74024b6e01e1475c0..bcaea5e108a2fc3409f09a8397663cfececc4f51 100644
--- a/README.md
+++ b/README.md
@@ -60,9 +60,10 @@ to download the source code.
      but are required if you want to run the library tests (`-DBUILD_TESTING`)
      and benchmarks (`-DBUILD_EXAMPLES`).
 
-   * The `-DCMAKE_INSTALL_DIR=<install-dir>` option is optional and sets the
-     install location (`<install-dir>`) for the library. The default location is
-     `/usr/local`.
+   * The `-DCMAKE_INSTALL_PREFIX=<install-dir>` option is optional and
+     specifies the base directory used to install the library. The library
+     archive is installed to `<install-dir>/lib` and headers are installed to
+     `<install-dir>/include`. The default location is `/usr/local`.
 
    * By default, a static library is built. To build a dynamic or a static
      library use the `-DBUILD_SHARED_LIBS={On|Off}` option.
@@ -73,14 +74,6 @@ to download the source code.
 
    Other common CMake `{options}` include:
 
-   * `-DCMAKE_INSTALL_PREFIX=<path>`
-
-       Specifies the base directory used to install the library. The library
-       archive is installed to `<path>/lib` and headers are installed to
-       `<path>/include`.
-
-       Default `<path>` is `/usr/local`.
-
    * `-DCMAKE_BUILD_TYPE={Debug|Release}`
 
        Specifies the set of flags used to build the library. The default is
@@ -164,11 +157,21 @@ to download the source code.
 
        Enable AddressSanitizer when building the library and tests.
        AddressSanitizer adds extra runtime checks to enable you to catch
-       errors, such as reads or writes off the end of arrays.
+       memory errors, such as reading or writing past the end of an array.
        `-DARMRAL_ENABLE_ASAN=On` incurs some reduction in runtime performance.
 
        Default is `Off`.
 
+   * `-DARMRAL_ENABLE_EFENCE={On|Off}`
+
+       Enable Electric Fence when building the library and tests.
+       Electric Fence will cause tests to segmentation fault in the presence
+       of memory errors, such as reading or writing past the end of an array.
+       This option allows you to test executables running under a test runner
+       such as QEMU.
+
+       Default is `Off`.
+
    * `-DARMRAL_ENABLE_COVERAGE={On|Off}`
 
        Enable (`On`), or disable (`Off`), code coverage instrumentation when
@@ -362,7 +365,7 @@ file.
 
 The Arm RAN Acceleration Library Reference Guide is available online at:
 
-    https://developer.arm.com/documentation/102249/2310
+    https://developer.arm.com/documentation/102249/2401
 
 If you have Doxygen installed on your system, you can build a local HTML version
 of the Arm RAN Acceleration Library documentation using CMake.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 9e04ed3487dd6148de4984f5f172dcd535670b5b..eef47a1746239b5de8c26be03a8c535cb7da6388 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -1,7 +1,7 @@
-# Arm RAN Acceleration Library 23.10 Release Note
+# Arm RAN Acceleration Library 24.01 Release Note
 
 Non-Confidential
-Copyright © 2020-2023 Arm Limited (or its affiliates). All rights reserved.
+Copyright © 2020-2024 Arm Limited (or its affiliates). All rights reserved.
 
 Arm conventions and proprietary notices, including confidentiality status,
 terminology statement, and product release status, can be found at the end of
@@ -31,7 +31,7 @@ The Arm RAN Acceleration Library (ArmRAL) contains a set of functions for
 accelerating telecommunications applications such as, but not limited to, 5G
 Radio Access Networks (RANs).
 
-The Arm RAN Acceleration Library 23.10 package provides a library that is
+The Arm RAN Acceleration Library 24.01 package provides a library that is
 optimized for Arm AArch64-based processors.
 
 Arm RAN Acceleration Library provides:
@@ -47,7 +47,7 @@ integers and 32-bit floating-point values.
 
 ## Release Status
 
-This is the 23.10 release of Arm RAN Acceleration Library.
+This is the 24.01 release of Arm RAN Acceleration Library.
 
 These deliverables are being released under the terms of the agreement between
 Arm and each licensee (the "Agreement"). All planned verification and
@@ -101,19 +101,19 @@ source from the Arm Developer website and then unpack the contents.
 
 5. Extract the tar file contents using a tar utility:
 
-    tar zxvf ral-armral-23.10.tar.gz
+    tar zxvf ral-armral-24.01.tar.gz
 
 ## Deliverables
 
 The downloaded product includes the deliverables listed in this section.
 
-- Arm RAN Acceleration Library 23.10
+- Arm RAN Acceleration Library 24.01
 - Release Notes (this document)
 - Documentation
 
   Product documentation is available on the Arm Developer website at:
 
-    https://developer.arm.com/documentation/102249/2310
+    https://developer.arm.com/documentation/102249/2401
 
   **Note:** Documentation, errata and release notes might change between product
   releases. For the latest documentation bundle, check the product download
@@ -134,58 +134,58 @@ Arm RAN Acceleration Library.
 Describes new features or any technical changes to features or
 components in this release.
 
-- Extended the `sequence_len` parameter of `armral_seq_generator` to
-  `uint32_t`. This work was contributed upstream by 4g5g Consultants.
+- Added support for the addition and removal of filler bits in
+  `armral_ldpc_encode_block`, `armral_ldpc_rate_matching` and
+  `armral_ldpc_rate_recovery` when the code block size is not a
+  multiple of lifting set size or when the soft buffer size is less
+  than the full buffer size. This process is described in the 3GPP
+  Technical Specification (TS) 38.212. This work was contributed
+  upstream by 4g5g Consultants.
 
-- Added parameter `i_bil` to `armral_polar_rate_matching` and
-  `armral_polar_rate_recovery` to enable or disable bit
-  interleaving. This work was contributed upstream by 4g5g
-  Consultants.
-
-- Added parameter `nref` to `armral_ldpc_rate_matching` and
-  `armral_ldpc_rate_recovery` to enable the routines to be used with a
-  soft buffer size. This work was contributed upstream by 4g5g
-  Consultants.
-
-- Added parameter `nref` to `armral_ldpc_rate_matching` and
-  `armral_ldpc_rate_recovery` to enable the routines to be used with a
-  soft buffer size. This work was contributed by Suraj Chalapathy from
-  4g5g Consultants.
+- Extended `armral_cmplx_pseudo_inverse_direct_f32` and
+  `armral_cmplx_pseudo_inverse_direct_f32_noalloc` to compute the
+  regularized pseudo-inverse of a single complex 32-bit matrix of size
+  `M-by-N` for cases where `M > N` in addition to the cases where `M
+  <= N`.
 
 ### Performance improvements
 
 Describes any features or components whose performance has improved in
 the current release compared with the previous release.
 
-- Performance improvements for Neon implementations of the following routines:
+- Performance improvements for the following routines:
 
-  * Polar block decoding (`armral_polar_decode_block`) for list
-    lengths 1, 2, 4 and 8.
+  * `armral_turbo_decode_block` and `armral_turbo_decode_block_noalloc`.
 
-  * LDPC block decoding (`armral_ldpc_decode_block` and
-    `armral_ldpc_decode_block_noalloc`).
+- Performance improvements for SVE2 implementations of the following routines:
+
+  * `armral_seq_generator`, for the cases when `sequence_len` is not a
+    multiple of 64.
 
 ### Changes to simulation programs
 
 Describes any changes, new features or components added to the channel
 simulation programs in this release.
 
-- Simulation programs are now built by default and are tested by the
-  make check target.
+- Added support for the addition and removal of filler bits in
+  `ldpc_awgn` when the code block size is not a multiple of lifting
+  set size. This work was contributed upstream by 4g5g Consultants.
 
 ### Resolved issues
 
-There are no resolved issues in this release.
+Describes any known issues resolved in the current release.
+
+- LDPC block encoding (`armral_ldpc_encode_block`), rate matching
+  (`armral_ldpc_rate_matching`) and rate recovery
+  (`armral_ldpc_rate_recovery`) now support the insertion and removal
+  of filler bits as described in the 3GPP Technical Specification (TS)
+  38.212.
 
 ## Known limitations
 
 Describes any known limitations of the current release.
 
-- LDPC block encoding (`armral_ldpc_encode_block`), rate matching
-  (`armral_ldpc_rate_matching`) and rate recovery
-  (`armral_ldpc_rate_recovery`) do not support the insertion and
-  removal of filler bits as described in as described in the 3GPP
-  Technical Specification (TS) 38.212.
+- There are no known limitations in this release.
 
 # Support
 
@@ -210,7 +210,7 @@ Acceleration Library:
 
 * A recent version of a C/C++ compiler, such as GCC. Arm RAN
   Acceleration Library has been tested with GCC 7.5.0, 8.5.0, 9.5.0,
-  10.4.0, 11.4.0, 12.3.0, and 13.1.0.
+  10.5.0, 11.4.0, 12.3.0, and 13.2.0.
 
   **Note:** If you are cross-compiling, you need a cross-toolchain compiler that
   targets AArch64. You can download open-source cross-toolchain builds of the
@@ -312,7 +312,7 @@ rights reserved. Other brands and names mentioned in this document may be the
 trademarks of their respective owners. Please follow Arm’s trademark usage
 guidelines at https://www.arm.com/company/policies/trademarks.
 
-Copyright © 2020-2023 Arm Limited (or its affiliates). All rights reserved.
+Copyright © 2020-2024 Arm Limited (or its affiliates). All rights reserved.
 
 Arm Limited. Company 02557590 registered in England.
 110 Fulbourn Road, Cambridge, England CB1 9NJ.
diff --git a/bench/CRC/11/BigEndian/bench.py b/bench/CRC/11/BigEndian/bench.py
index be032e5a066698b32fa1c7d781875c248b87fb8e..6c6f668545b78a7d31667646c7339348808e5534 100755
--- a/bench/CRC/11/BigEndian/bench.py
+++ b/bench/CRC/11/BigEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/11/BigEndian/main.cpp b/bench/CRC/11/BigEndian/main.cpp
index b6835f84cfa358af2bd875d9d9f5b8952e6b53c7..d82dbd425e411243d7baf3edf41b9e442d5485b3 100644
--- a/bench/CRC/11/BigEndian/main.cpp
+++ b/bench/CRC/11/BigEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/11/LittleEndian/bench.py b/bench/CRC/11/LittleEndian/bench.py
index 321d2ed5a10ee87b98f5e90c28b90242b79620ac..350c7eae6b0e4e77f07102557aa48c13e409eaf9 100755
--- a/bench/CRC/11/LittleEndian/bench.py
+++ b/bench/CRC/11/LittleEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/11/LittleEndian/main.cpp b/bench/CRC/11/LittleEndian/main.cpp
index 8a5760e7905054a6711d59f09e8bf7078eade29e..533b507fc32723f253b2674ffcf5c5081e3bd4c7 100644
--- a/bench/CRC/11/LittleEndian/main.cpp
+++ b/bench/CRC/11/LittleEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/16/BigEndian/bench.py b/bench/CRC/16/BigEndian/bench.py
index cb415796ee1f75bad4bfb28d95bde659f1cd2187..8bf0fc0da405e78369bdac10fd01120c0ee0d810 100755
--- a/bench/CRC/16/BigEndian/bench.py
+++ b/bench/CRC/16/BigEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/16/BigEndian/main.cpp b/bench/CRC/16/BigEndian/main.cpp
index ec28a5327ba1090c09ec896a11b5d2a00566f3b6..a81ccf53f706a73df49fc13475c3f3ced1cbc01f 100644
--- a/bench/CRC/16/BigEndian/main.cpp
+++ b/bench/CRC/16/BigEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/16/LittleEndian/bench.py b/bench/CRC/16/LittleEndian/bench.py
index 7ddf27a38e10ebf375ce59b3a7acab627bf0cc1b..4c8ce839c7e42ea3061b4614a098930fb4ff0482 100755
--- a/bench/CRC/16/LittleEndian/bench.py
+++ b/bench/CRC/16/LittleEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/16/LittleEndian/main.cpp b/bench/CRC/16/LittleEndian/main.cpp
index a88e38cddc88e25a0d59ac612c55b72d80a89248..ded10e8a30cf2479dc13a72f8f43cbe958f4cbed 100644
--- a/bench/CRC/16/LittleEndian/main.cpp
+++ b/bench/CRC/16/LittleEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/24/A/BigEndian/bench.py b/bench/CRC/24/A/BigEndian/bench.py
index 4d69b5f3460504812e4773164522d5473aa72b0b..a69cb7e0c4e2d3b85100a2c440716d9e8d0b99ad 100755
--- a/bench/CRC/24/A/BigEndian/bench.py
+++ b/bench/CRC/24/A/BigEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/24/A/BigEndian/main.cpp b/bench/CRC/24/A/BigEndian/main.cpp
index e970ae3e48e07db2cad8f0fcfb8998834e4f2707..ee1e1c766792a8d56483b17a73ced19e5d9d9989 100644
--- a/bench/CRC/24/A/BigEndian/main.cpp
+++ b/bench/CRC/24/A/BigEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/24/A/LittleEndian/bench.py b/bench/CRC/24/A/LittleEndian/bench.py
index e072699c6129a9c0fefad3803aef7e9dcf4994ee..576bafc2996b9d64c9b04b544b8a5d523a9025d7 100755
--- a/bench/CRC/24/A/LittleEndian/bench.py
+++ b/bench/CRC/24/A/LittleEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/24/A/LittleEndian/main.cpp b/bench/CRC/24/A/LittleEndian/main.cpp
index 43d515f00d7b7a00b9d26649c7ba9d091dd2da38..17325f8a50a9d8db86917b08074c15c3f1cc5265 100644
--- a/bench/CRC/24/A/LittleEndian/main.cpp
+++ b/bench/CRC/24/A/LittleEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/24/B/BigEndian/bench.py b/bench/CRC/24/B/BigEndian/bench.py
index 217c6ffea998d4af164d992cb418eb3186fa14c9..aa318554e83202bf5980d86a9b7300a812d4f6c9 100755
--- a/bench/CRC/24/B/BigEndian/bench.py
+++ b/bench/CRC/24/B/BigEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/24/B/BigEndian/main.cpp b/bench/CRC/24/B/BigEndian/main.cpp
index 3bb0e976ff7afb98e498e3c63b2d32255ca3bdb0..876deaf02e27ed6accee0c3d91096e76ce323b41 100644
--- a/bench/CRC/24/B/BigEndian/main.cpp
+++ b/bench/CRC/24/B/BigEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/24/B/LittleEndian/bench.py b/bench/CRC/24/B/LittleEndian/bench.py
index bcb362e007db9073cd016ad7faefd55e6ee27530..cbd7e953a87b4e8d5c4af1342a2bbc760ece8ba7 100755
--- a/bench/CRC/24/B/LittleEndian/bench.py
+++ b/bench/CRC/24/B/LittleEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/24/B/LittleEndian/main.cpp b/bench/CRC/24/B/LittleEndian/main.cpp
index 480e9f5e34d3045415a8ef173c2b73d52026f2d0..b19eb358d9e7c5233646b1870f3c9ed38b4e2e9e 100644
--- a/bench/CRC/24/B/LittleEndian/main.cpp
+++ b/bench/CRC/24/B/LittleEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/24/C/BigEndian/bench.py b/bench/CRC/24/C/BigEndian/bench.py
index b23ef539875c340c1d190e0f3a5e4b3645001ab5..42303ee6b27b11e0cbe44160eaf2563140a8d2dd 100755
--- a/bench/CRC/24/C/BigEndian/bench.py
+++ b/bench/CRC/24/C/BigEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/24/C/BigEndian/main.cpp b/bench/CRC/24/C/BigEndian/main.cpp
index dc73062f000290b6e2a8663a2bdb356ddc1a91a9..e1a18f2fa8cc0739a54e5cacd843fd50c7b4415b 100644
--- a/bench/CRC/24/C/BigEndian/main.cpp
+++ b/bench/CRC/24/C/BigEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/24/C/LittleEndian/bench.py b/bench/CRC/24/C/LittleEndian/bench.py
index a246f0af56c57ed3e364a8aaa0a7348b32c29bd1..331bb2609f9b4fdad5f02d0826f1ee10682611ff 100755
--- a/bench/CRC/24/C/LittleEndian/bench.py
+++ b/bench/CRC/24/C/LittleEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/24/C/LittleEndian/main.cpp b/bench/CRC/24/C/LittleEndian/main.cpp
index 87177e902ac1ec12e622fadbbda4ee0cb69f911a..d9c0a813ce2e6b934aaf77b6c774cb1e759f0475 100644
--- a/bench/CRC/24/C/LittleEndian/main.cpp
+++ b/bench/CRC/24/C/LittleEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/6/BigEndian/bench.py b/bench/CRC/6/BigEndian/bench.py
index 41b955ecf3fd71bd9ee2fd972b29f057de678422..bb642253c2ec69c39e26ee273f653698bddf0e37 100755
--- a/bench/CRC/6/BigEndian/bench.py
+++ b/bench/CRC/6/BigEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/6/BigEndian/main.cpp b/bench/CRC/6/BigEndian/main.cpp
index 7e7101b6696f8a702d69a593d7fc8d4c883f767b..b74b808383259a730d5ee47bdc8a93bb9abef47a 100644
--- a/bench/CRC/6/BigEndian/main.cpp
+++ b/bench/CRC/6/BigEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/CRC/6/LittleEndian/bench.py b/bench/CRC/6/LittleEndian/bench.py
index 3777eb90e480e99b87eec19b1f7747d8e4b56fd6..7878f82c8dccd3d3c2d1ece0bdb252c1cf778265 100755
--- a/bench/CRC/6/LittleEndian/bench.py
+++ b/bench/CRC/6/LittleEndian/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/CRC/6/LittleEndian/main.cpp b/bench/CRC/6/LittleEndian/main.cpp
index f1eb3ce67d301618ed974773cdcd731aa7ae238e..8363eae8ab05689f78965ee47ea307d05a3f52be 100644
--- a/bench/CRC/6/LittleEndian/main.cpp
+++ b/bench/CRC/6/LittleEndian/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ConvCoding/Decoding/bench.py b/bench/ConvCoding/Decoding/bench.py
index aa5dc8d5aae47d2a2bc818c6dbe199f336243266..16ebdb05111357834cc49305b3ccc4b4eff68ada 100755
--- a/bench/ConvCoding/Decoding/bench.py
+++ b/bench/ConvCoding/Decoding/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ConvCoding/Decoding/main.cpp b/bench/ConvCoding/Decoding/main.cpp
index 1cb1cb4f904b60e4eb8af2ed69bbeaae39556e16..8bc34dda6ee2c143295332c49399c9bfb0c5d2d5 100644
--- a/bench/ConvCoding/Decoding/main.cpp
+++ b/bench/ConvCoding/Decoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ConvCoding/Encoding/bench.py b/bench/ConvCoding/Encoding/bench.py
index 0615a12a0cd7feee8dbf03ce9e220d58ce4eb492..fca556c2f1b950f510686d716f2bd518453b4c6a 100755
--- a/bench/ConvCoding/Encoding/bench.py
+++ b/bench/ConvCoding/Encoding/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ConvCoding/Encoding/main.cpp b/bench/ConvCoding/Encoding/main.cpp
index a3437602fbede6be8d81509c9150ea1ac2c0f51a..8221726656c6bb5cf4238cbafb0437e2c953f891 100644
--- a/bench/ConvCoding/Encoding/main.cpp
+++ b/bench/ConvCoding/Encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Correlation/bench.py b/bench/Correlation/bench.py
index cc9dfe4e7bcac86a8a78888becb16d7d047a38c9..9a36a392b8a09a019dc41643f90c4c8dfef277d1 100755
--- a/bench/Correlation/bench.py
+++ b/bench/Correlation/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Correlation/main.cpp b/bench/Correlation/main.cpp
index 34384005d342e14053f02499d16cf9ad5d305d88..068172fcf58334024c260d066c9b6a3d76db64e1 100644
--- a/bench/Correlation/main.cpp
+++ b/bench/Correlation/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Demodulation/bench.py b/bench/Demodulation/bench.py
index 1740b993baa68bb781db6527facd6f296272b357..051554d0168def88ca7b850107bc6745c12450d8 100755
--- a/bench/Demodulation/bench.py
+++ b/bench/Demodulation/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/Demodulation/main.cpp b/bench/Demodulation/main.cpp
index 03175d74fab820b3a419235ab8afb2e013b40657..d6b9c926484636062cbb04a0914de11dc2e59049 100644
--- a/bench/Demodulation/main.cpp
+++ b/bench/Demodulation/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ElemWiseVectorMult/VecMul16/bench.py b/bench/ElemWiseVectorMult/VecMul16/bench.py
index 813aa84c0518416815de1499468f9d37a105431b..c8d40c39d751ad97f7469ba8ff529620a43d426a 100755
--- a/bench/ElemWiseVectorMult/VecMul16/bench.py
+++ b/bench/ElemWiseVectorMult/VecMul16/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ElemWiseVectorMult/VecMul16/main.cpp b/bench/ElemWiseVectorMult/VecMul16/main.cpp
index a08b0fe204c70453cedd416a193cbb607cc9e574..3c40c29ced7d3a3014b00dba30f3409e9e684e92 100644
--- a/bench/ElemWiseVectorMult/VecMul16/main.cpp
+++ b/bench/ElemWiseVectorMult/VecMul16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ElemWiseVectorMult/VecMul16_2/bench.py b/bench/ElemWiseVectorMult/VecMul16_2/bench.py
index b64bfa3c1084e75bef0eff74bd5a69a545d33de6..ee9f806cc2d637b54e93d7412008cf9be5201acf 100755
--- a/bench/ElemWiseVectorMult/VecMul16_2/bench.py
+++ b/bench/ElemWiseVectorMult/VecMul16_2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ElemWiseVectorMult/VecMul16_2/main.cpp b/bench/ElemWiseVectorMult/VecMul16_2/main.cpp
index cb971799643a9f77ade23910d12967681e0b9e12..758eece8f5b8e640d541987b5bbab4b63d816090 100644
--- a/bench/ElemWiseVectorMult/VecMul16_2/main.cpp
+++ b/bench/ElemWiseVectorMult/VecMul16_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ElemWiseVectorMult/VecMul32/bench.py b/bench/ElemWiseVectorMult/VecMul32/bench.py
index a220f0e62f295e0f1c56d608f9d457e0931ad9c3..b8007ef22849129b2e6e6f88ab54f76c47a5cb33 100755
--- a/bench/ElemWiseVectorMult/VecMul32/bench.py
+++ b/bench/ElemWiseVectorMult/VecMul32/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ElemWiseVectorMult/VecMul32/main.cpp b/bench/ElemWiseVectorMult/VecMul32/main.cpp
index 433a2c9e2f6cda43767046ea62ad3d6946663b16..76c149aabb94b4246ada61dfa90230820fe3fdcb 100644
--- a/bench/ElemWiseVectorMult/VecMul32/main.cpp
+++ b/bench/ElemWiseVectorMult/VecMul32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ElemWiseVectorMult/VecMul32_2/bench.py b/bench/ElemWiseVectorMult/VecMul32_2/bench.py
index 382356fe33d79ee2e9435e55d31ea9ced3c72b1a..ea08bf99ac1c039f367b62883ca72586ba212542 100755
--- a/bench/ElemWiseVectorMult/VecMul32_2/bench.py
+++ b/bench/ElemWiseVectorMult/VecMul32_2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ElemWiseVectorMult/VecMul32_2/main.cpp b/bench/ElemWiseVectorMult/VecMul32_2/main.cpp
index d9471385ecff354024b24dc2215d6c6f0aee4ded..ec1a2102cc3f3984248b3b64243cd9b5a0a1c4ab 100644
--- a/bench/ElemWiseVectorMult/VecMul32_2/main.cpp
+++ b/bench/ElemWiseVectorMult/VecMul32_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/FFT/FFT16/bench.py b/bench/FFT/FFT16/bench.py
index c7ee0ce18fced782eb925a6d72b5eff55eca28e0..f560e04ba74a151db2542990f9c41f7a84c5374f 100755
--- a/bench/FFT/FFT16/bench.py
+++ b/bench/FFT/FFT16/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/FFT/FFT16/main.cpp b/bench/FFT/FFT16/main.cpp
index 402b0a5cf696fce0e1cb13ff2c491d5f00732d59..2ce24b081f695a5f995bb048d08849b712f16909 100644
--- a/bench/FFT/FFT16/main.cpp
+++ b/bench/FFT/FFT16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/FFT/FFT32/bench.py b/bench/FFT/FFT32/bench.py
index 3e1a30ef957e9848be071a6dc218e4ae9c113669..83a3e73190defb9bd331d2d313dcc99cca61117b 100755
--- a/bench/FFT/FFT32/bench.py
+++ b/bench/FFT/FFT32/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/FFT/FFT32/main.cpp b/bench/FFT/FFT32/main.cpp
index 1281790df6f2b5a7aa9f399172d4fae1b2405ab2..d20456b4fd43594fc02fc3c917e8ceb68eca1a6a 100644
--- a/bench/FFT/FFT32/main.cpp
+++ b/bench/FFT/FFT32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/FIR/FIR16/bench.py b/bench/FIR/FIR16/bench.py
index 63fa35fbcd48c5575895d092c0d4dd0b6d19a507..f0b19e8c5665be9b5749efb1dedf097a6fba2e78 100755
--- a/bench/FIR/FIR16/bench.py
+++ b/bench/FIR/FIR16/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/FIR/FIR16/main.cpp b/bench/FIR/FIR16/main.cpp
index 26a02879fdf2be70483f8d088df9ed9d7cf4be10..58ee2c8b005047f87a8ea5826cd42ef3d97e7f6a 100644
--- a/bench/FIR/FIR16/main.cpp
+++ b/bench/FIR/FIR16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/FIR/FIR16Decimate2/bench.py b/bench/FIR/FIR16Decimate2/bench.py
index 716a98d99f476568a221e5c0961beb48a0190b7d..956ca7cd2f86e34f74c89716ee5fbc4be50dcacf 100755
--- a/bench/FIR/FIR16Decimate2/bench.py
+++ b/bench/FIR/FIR16Decimate2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/FIR/FIR16Decimate2/main.cpp b/bench/FIR/FIR16Decimate2/main.cpp
index 8e2ec7b7636573e8468277eb882a6eadd94fe213..8b8265ae0349a0dc4fe18dda548ac4a14ad8a9ac 100644
--- a/bench/FIR/FIR16Decimate2/main.cpp
+++ b/bench/FIR/FIR16Decimate2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/FIR/FIR32/bench.py b/bench/FIR/FIR32/bench.py
index 731f0be834b2ab5da7054b1a8ed5ba754f8b59cd..86757b6adb4187f94767ac44ce982520d5b34dae 100755
--- a/bench/FIR/FIR32/bench.py
+++ b/bench/FIR/FIR32/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/FIR/FIR32/main.cpp b/bench/FIR/FIR32/main.cpp
index 4c35278a5372d505dcfaa58d362f9ed5316bedb8..02e3b087c3a3d635082259f622316003747ad9ca 100644
--- a/bench/FIR/FIR32/main.cpp
+++ b/bench/FIR/FIR32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/FIR/FIR32Decimate2/bench.py b/bench/FIR/FIR32Decimate2/bench.py
index 3561ceb2a68836cf4175f13011d97cb98b66b24c..41fc6c18739e031b4e7c4b0e57132e1bb5d78e6e 100755
--- a/bench/FIR/FIR32Decimate2/bench.py
+++ b/bench/FIR/FIR32Decimate2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/FIR/FIR32Decimate2/main.cpp b/bench/FIR/FIR32Decimate2/main.cpp
index 83d7da76ede6b107feed0b45065d4fd90f517c9c..b663f1906e5021b2e82f0f34d220c00eede9e8d0 100644
--- a/bench/FIR/FIR32Decimate2/main.cpp
+++ b/bench/FIR/FIR32Decimate2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/LDPC/Decoding/bench.py b/bench/LDPC/Decoding/bench.py
index 98e5ac01ce36d92eb271bae657ee9a5c33506e49..620ad1238b367ba0775a7a38cf0843622e06834b 100755
--- a/bench/LDPC/Decoding/bench.py
+++ b/bench/LDPC/Decoding/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/LDPC/Decoding/main.cpp b/bench/LDPC/Decoding/main.cpp
index cab11b2edcc7f7e6e31d867699da348d39436da8..9d26974555fd994bdd9df11b40add1cc63d63885 100755
--- a/bench/LDPC/Decoding/main.cpp
+++ b/bench/LDPC/Decoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "armral.h"
@@ -33,13 +33,13 @@ void run_ldpc_decoding_perf(armral_ldpc_graph_t bg, uint32_t z,
   std::vector<uint8_t> buffer(buffer_size);
   for (uint32_t r = 0; r < num_reps; ++r) {
     buffer_bump_allocator allocator{buffer.data()};
-    armral_ldpc::decode_block<false, buffer_bump_allocator>(
+    armral::ldpc::decode_block<false, buffer_bump_allocator>(
         llr_ptr, bg, z, crc_idx, num_its, out_ptr, allocator);
   }
 #else
   for (uint32_t r = 0; r < num_reps; ++r) {
     heap_allocator allocator{};
-    armral_ldpc::decode_block<false, heap_allocator>(
+    armral::ldpc::decode_block<false, heap_allocator>(
         llr_ptr, bg, z, crc_idx, num_its, out_ptr, allocator);
   }
 #endif
diff --git a/bench/LDPC/Encoding/bench.py b/bench/LDPC/Encoding/bench.py
index 7bfc65b139da749611e610bbdbef6d2a183f3488..dd8f9d406d101c3b239df605e4e07e09668d54cd 100755
--- a/bench/LDPC/Encoding/bench.py
+++ b/bench/LDPC/Encoding/bench.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+
 import json
 import itertools
 from pathlib import Path
@@ -11,10 +12,12 @@ def get_path(x): return x if Path(x).is_file() else os.path.join("armral", x)
 
 
 exe_name = get_path("bench_ldpc_encoding")
+
 j = {
     "exe_name": exe_name,
     "cases": []
 }
+
 base_graphs = [1, 2]
 lifting_sizes = [2, 11, 16, 18, 30, 36, 52, 112, 160, 208, 384]
 len_filler_bits = [0, 0, 0, 76, 0, 0, 0, 72, 0, 0, 0]
diff --git a/bench/LDPC/Encoding/main.cpp b/bench/LDPC/Encoding/main.cpp
index 1c3c310f8c7cd4414783a6672f8d6a15de6dac3a..d7c075a26b5ce3b2fee0699da35ddb31ab3713dd 100644
--- a/bench/LDPC/Encoding/main.cpp
+++ b/bench/LDPC/Encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
@@ -50,7 +50,7 @@ int main(int argc, char **argv) {
     //              This gets converted into the enum representing the base
     //              graph 1 -> LDPC_BASE_GRAPH_1 2 -> LDPC_BASE_GRAPH_2
     // lifting_size:  The lifting size Z to use in the block encoding
-    // len_filler_bits:  Length of filler bits As per section 5.2.2 of TS 38.212
+    // len_filler_bits:  Length of filler bits as per section 5.2.2 of TS 38.212
     // num_reps:      The number of times to repeat the encoding, so as to get a
     //                stable performance number
     printf("Usage: %s base_graph lifting_size len_filler_bits num_reps\n",
diff --git a/bench/LDPC/RateMatching/bench.py b/bench/LDPC/RateMatching/bench.py
new file mode 100755
index 0000000000000000000000000000000000000000..5d752eca3e5aaa5797e6590031c139d591d13226
--- /dev/null
+++ b/bench/LDPC/RateMatching/bench.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# Arm RAN Acceleration Library
+# Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+
+import json
+from pathlib import Path
+import os
+
+
+def get_path(x): return x if Path(x).is_file() else os.path.join("armral", x)
+
+
+exe_name = get_path("bench_ldpc_rate_matching")
+
+j = {
+    "exe_name": exe_name,
+    "cases": []
+}
+
+para_list = [
+    (1, 2, 44, 25344, 0, 0),
+    (1, 11, 242, 25344, 0, 0),
+    (1, 112, 2464, 25344, 0, 0),
+    (1, 208, 4576, 25344, 0, 0),
+    (1, 384, 8448, 25344, 0, 0),
+    (2, 2, 22, 19200, 0, 0),
+    (2, 11, 112, 19200, 0, 0),
+    (2, 112, 1232, 19200, 0, 0),
+    (2, 208, 2288, 19200, 0, 0),
+    (2, 384, 4224, 19200, 0, 0)
+]
+
+# We scale the number of reps according to the lifting size.
+target_reps = 150000
+
+for bg, z, e, nref, rv, mod in para_list:
+    case = {
+        "name": "ldpc_rate_matching_bg{}_z{}_e{}_nref{}_rv{}_mod{}".format(
+            bg, z, e, nref, rv, mod
+        ),
+        "args": "{} {} {} {} {} {}".format(bg, z, e, nref, rv, mod),
+        "reps": target_reps * 2 // z,
+    }
+    j["cases"].append(case)
+print(json.dumps(j))
diff --git a/bench/LDPC/RateMatching/main.cpp b/bench/LDPC/RateMatching/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d99459abbabb743253e6330c12ff5931ecba43f6
--- /dev/null
+++ b/bench/LDPC/RateMatching/main.cpp
@@ -0,0 +1,79 @@
+/*
+    Arm RAN Acceleration Library
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+*/
+#include "armral.h"
+#include "ldpc_coding.hpp"
+
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+
+namespace {
+
+void run_ldpc_rate_matching_perf(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
+                                 uint32_t nref, uint32_t rv,
+                                 armral_modulation_type mod, uint32_t reps) {
+  printf("[LDPC RATE MATCHING] bg = %u, z = %u, e = %u, nref = %u, rv = %u, "
+         "mod = %u, number of repetitions = %u\n",
+         (uint32_t)bg, z, e, nref, rv, (uint32_t)mod, reps);
+
+  const auto *graph = armral_ldpc_get_base_graph(bg);
+
+  uint32_t len_filler_bits = 0;
+  uint32_t in_size = graph->nmessage_bits * z;
+  uint32_t out_size = (graph->ncodeword_bits + 2) * z;
+
+  std::vector<uint8_t> in((in_size + 7) / 8);
+  std::vector<uint8_t> out((out_size + 7) / 8);
+  const auto *in_ptr = in.data();
+  auto *out_ptr = out.data();
+#ifdef ARMRAL_BENCH_NOALLOC
+  std::vector<uint8_t> buffer((2 * z * 66) + e);
+  auto *buffer_ptr = buffer.data();
+
+  for (uint32_t r = 0; r < reps; ++r) {
+    armral_ldpc_rate_matching_noalloc(bg, z, e, nref, len_filler_bits, in_size,
+                                      rv, mod, in_ptr, out_ptr, buffer_ptr);
+  }
+#else
+  for (uint32_t r = 0; r < reps; ++r) {
+    armral_ldpc_rate_matching(bg, z, e, nref, len_filler_bits, in_size, rv, mod,
+                              in_ptr, out_ptr);
+  }
+#endif
+}
+
+} // anonymous namespace
+
+int main(int argc, char **argv) {
+  if (argc != 8) {
+    // base_graph:    Integer representing the base graph to use.
+    //                This gets converted into the enum representing the base
+    //                graph 1 -> LDPC_BASE_GRAPH_1 2 -> LDPC_BASE_GRAPH_2
+    // lifting_size:  The lifting size Z.
+    // e:             The number of bits in the rate-matched message.
+    //                This is assumed to be a multiple of the number of
+    //                bits per modulation symbol.
+    // nref:          The soft buffer size for limited buffer rate matching.
+    // rv:            Redundancy version used in rate matching.
+    //                Must be in the set {0, 1, 2, 3}.
+    // mod:           The type of modulation to perform.
+    // num_reps:      The number of times to repeat the function.
+    printf("Usage: %s base_graph lifting_size e nref rv mod num_reps\n",
+           argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  auto bg = (armral_ldpc_graph_t)(atoi(argv[1]) - 1);
+  auto z = (uint32_t)atoi(argv[2]);
+  auto e = (uint32_t)atoi(argv[3]);
+  auto nref = (uint32_t)atoi(argv[4]);
+  auto rv = (uint32_t)atoi(argv[5]);
+  auto mod = (armral_modulation_type)atoi(argv[6]);
+  auto reps = (uint32_t)atoi(argv[7]);
+
+  run_ldpc_rate_matching_perf(bg, z, e, nref, rv, mod, reps);
+
+  return EXIT_SUCCESS;
+}
diff --git a/bench/LDPC/RateRecovery/bench.py b/bench/LDPC/RateRecovery/bench.py
new file mode 100755
index 0000000000000000000000000000000000000000..02463ca68f0ca3a326a39df41ef032c1a0cdb459
--- /dev/null
+++ b/bench/LDPC/RateRecovery/bench.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+# Arm RAN Acceleration Library
+# Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+
+import json
+from pathlib import Path
+import os
+
+
+def get_path(x): return x if Path(x).is_file() else os.path.join("armral", x)
+
+
+exe_name = get_path("bench_ldpc_rate_recovery")
+
+j = {
+    "exe_name": exe_name,
+    "cases": []
+}
+
+para_list = [
+    (1, 2, 44, 25344, 0, 0),
+    (1, 11, 242, 25344, 0, 0),
+    (1, 112, 2464, 25344, 0, 0),
+    (1, 208, 4576, 25344, 0, 0),
+    (1, 384, 8448, 25344, 0, 0),
+    (2, 2, 22, 19200, 0, 0),
+    (2, 11, 112, 19200, 0, 0),
+    (2, 112, 1232, 19200, 0, 0),
+    (2, 208, 2288, 19200, 0, 0),
+    (2, 384, 4224, 19200, 0, 0)
+]
+
+# We scale the number of reps according to the lifting size.
+target_reps = 150000
+
+for bg, z, e, nref, rv, mod in para_list:
+    case = {
+        "name": "ldpc_rate_recovery_bg{}_z{}_e{}_nref{}_rv{}_mod{}".format(
+            bg, z, e, nref, rv, mod
+        ),
+        "args": "{} {} {} {} {} {}".format(
+            bg, z, e, nref, rv, mod),
+        "reps": target_reps * 2 // z,
+    }
+    j["cases"].append(case)
+print(json.dumps(j))
diff --git a/bench/LDPC/RateRecovery/main.cpp b/bench/LDPC/RateRecovery/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..af9e0566e8eec31512552a8306435a66318e79cf
--- /dev/null
+++ b/bench/LDPC/RateRecovery/main.cpp
@@ -0,0 +1,74 @@
+/*
+    Arm RAN Acceleration Library
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+*/
+#include "armral.h"
+#include "ldpc_coding.hpp"
+
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+
+namespace {
+
+void run_ldpc_rate_recovery_perf(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
+                                 uint32_t nref, uint32_t rv,
+                                 armral_modulation_type mod, uint32_t reps) {
+  printf("[LDPC RATE RECOVERY] bg = %u, z = %u, e = %u, nref = %u, rv = %u, "
+         "mod = %u, number of repetitions = %u\n",
+         (uint32_t)bg, z, e, nref, rv, (uint32_t)mod, reps);
+
+  uint32_t len_filler_bits = 0;
+  uint32_t n = (bg == LDPC_BASE_GRAPH_2) ? 50 * z : 66 * z;
+  std::vector<int8_t> in(n);
+  std::vector<int8_t> out(n);
+  const auto *in_ptr = in.data();
+  auto *out_ptr = out.data();
+
+#ifdef ARMRAL_BENCH_NOALLOC
+  std::vector<uint8_t> buffer((z * 66) + e);
+  auto *buffer_ptr = buffer.data();
+
+  for (uint32_t r = 0; r < reps; ++r) {
+    armral_ldpc_rate_recovery_noalloc(bg, z, e, nref, len_filler_bits, n, rv,
+                                      mod, in_ptr, out_ptr, buffer_ptr);
+  }
+#else
+  for (uint32_t r = 0; r < reps; ++r) {
+    armral_ldpc_rate_recovery(bg, z, e, nref, len_filler_bits, n, rv, mod,
+                              in_ptr, out_ptr);
+  }
+#endif
+}
+
+} // anonymous namespace
+
+int main(int argc, char **argv) {
+  if (argc != 8) {
+    // base_graph:    Integer representing the base graph to use.
+    //                This gets converted into the enum representing the base
+    //                graph 1 -> LDPC_BASE_GRAPH_1 2 -> LDPC_BASE_GRAPH_2
+    // lifting_size:  The lifting size Z.
+    // e:             The number of LLRs in the demodulated message.
+    // nref:          The soft buffer size for limited buffer rate recovery.
+    // rv:            Redundancy version used in rate recovery.
+    //                Must be in the set {0, 1, 2, 3}.
+    // mod:           The type of modulation which was performed.
+    // num_reps:      The number of times to repeat the function.
+    printf("Usage: %s base_graph lifting_size e nref rv mod num_reps\n",
+           argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  auto bg = (armral_ldpc_graph_t)(atoi(argv[1]) - 1);
+  auto z = (uint32_t)atoi(argv[2]);
+  auto e = (uint32_t)atoi(argv[3]);
+  auto nref = (uint32_t)atoi(argv[4]);
+  auto rv = (uint32_t)atoi(argv[5]);
+  auto mod = (armral_modulation_type)atoi(argv[6]);
+  auto reps = (uint32_t)atoi(argv[7]);
+
+  run_ldpc_rate_recovery_perf(bg, z, e, nref, rv, mod, reps);
+
+  return EXIT_SUCCESS;
+}
diff --git a/bench/MatrixInv/Batch/GeneralMatInv/NonPA/bench.py b/bench/MatrixInv/Batch/GeneralMatInv/NonPA/bench.py
index dee58454c397fd4cf68738af8e163e957bb71988..74414bb4b8f58e48287a6755259912394a5236ef 100755
--- a/bench/MatrixInv/Batch/GeneralMatInv/NonPA/bench.py
+++ b/bench/MatrixInv/Batch/GeneralMatInv/NonPA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixInv/Batch/GeneralMatInv/NonPA/main.cpp b/bench/MatrixInv/Batch/GeneralMatInv/NonPA/main.cpp
index 9c3505ff659455799b3c06b98f17967d87ffa88a..32847b488d83f24e9cb6a129565f2a6c7323e278 100644
--- a/bench/MatrixInv/Batch/GeneralMatInv/NonPA/main.cpp
+++ b/bench/MatrixInv/Batch/GeneralMatInv/NonPA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "matrix_utils.hpp"
diff --git a/bench/MatrixInv/Batch/GeneralMatInv/PA/bench.py b/bench/MatrixInv/Batch/GeneralMatInv/PA/bench.py
index e52b7fbb42b59ac908eca0df80a223fdc7689126..aeaf28cae123b91b35480dfeac2a467a8f04ca07 100755
--- a/bench/MatrixInv/Batch/GeneralMatInv/PA/bench.py
+++ b/bench/MatrixInv/Batch/GeneralMatInv/PA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixInv/Batch/GeneralMatInv/PA/main.cpp b/bench/MatrixInv/Batch/GeneralMatInv/PA/main.cpp
index 83ffe65e797dd774fbede8f4cd87b4215b4dfec9..0f27c83735e74eb14de3ff5ba090f9879d937d67 100644
--- a/bench/MatrixInv/Batch/GeneralMatInv/PA/main.cpp
+++ b/bench/MatrixInv/Batch/GeneralMatInv/PA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "matrix_utils.hpp"
diff --git a/bench/MatrixInv/Batch/HermitianMatInv/NonPA/bench.py b/bench/MatrixInv/Batch/HermitianMatInv/NonPA/bench.py
index ee297baed59b44c99aaaf5472efcab033c55ea63..8710c18caa683a98e31d02d18b8c874559468d00 100755
--- a/bench/MatrixInv/Batch/HermitianMatInv/NonPA/bench.py
+++ b/bench/MatrixInv/Batch/HermitianMatInv/NonPA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixInv/Batch/HermitianMatInv/NonPA/main.cpp b/bench/MatrixInv/Batch/HermitianMatInv/NonPA/main.cpp
index 401f7674e705a3e4f7e037a77000cbc68c6246ec..8c66a87be3737bbaf2d176fb66b5f5668d5c969c 100644
--- a/bench/MatrixInv/Batch/HermitianMatInv/NonPA/main.cpp
+++ b/bench/MatrixInv/Batch/HermitianMatInv/NonPA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "matrix_utils.hpp"
diff --git a/bench/MatrixInv/Batch/HermitianMatInv/PA/bench.py b/bench/MatrixInv/Batch/HermitianMatInv/PA/bench.py
index 2994dbb2922602e5944462a5c506186999bcd8dd..eaf5b9baad7575f2aadcf34fb194099582784fb1 100755
--- a/bench/MatrixInv/Batch/HermitianMatInv/PA/bench.py
+++ b/bench/MatrixInv/Batch/HermitianMatInv/PA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixInv/Batch/HermitianMatInv/PA/main.cpp b/bench/MatrixInv/Batch/HermitianMatInv/PA/main.cpp
index 8db0c70a8d5985ce8e733aa4a22297c283d47517..6536ea8615c61e3998787206b4c082e5a27bdd9e 100644
--- a/bench/MatrixInv/Batch/HermitianMatInv/PA/main.cpp
+++ b/bench/MatrixInv/Batch/HermitianMatInv/PA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "matrix_utils.hpp"
diff --git a/bench/MatrixInv/Single/GeneralMatInv/bench.py b/bench/MatrixInv/Single/GeneralMatInv/bench.py
index 88328f20bb160afb3b0f7fd51c77ab0f267b1a70..3903a1ba46d75ba8586c4600f1dfca95c4be3f17 100755
--- a/bench/MatrixInv/Single/GeneralMatInv/bench.py
+++ b/bench/MatrixInv/Single/GeneralMatInv/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixInv/Single/GeneralMatInv/main.cpp b/bench/MatrixInv/Single/GeneralMatInv/main.cpp
index 6847b079a266ffe7e56021d16adbce8ebb75d978..94e63e03c02f6244ea82601207f5b1c75d639728 100644
--- a/bench/MatrixInv/Single/GeneralMatInv/main.cpp
+++ b/bench/MatrixInv/Single/GeneralMatInv/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "matrix_utils.hpp"
diff --git a/bench/MatrixInv/Single/HermitianMatInv/bench.py b/bench/MatrixInv/Single/HermitianMatInv/bench.py
index 482800f62d7674d9a24f41752d077cec06055b1b..f0d7e5b984adc201e291fb64f6bbd234d16c0e5d 100755
--- a/bench/MatrixInv/Single/HermitianMatInv/bench.py
+++ b/bench/MatrixInv/Single/HermitianMatInv/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixInv/Single/HermitianMatInv/main.cpp b/bench/MatrixInv/Single/HermitianMatInv/main.cpp
index 9d053591ba0848117bf6f18d40dafd304bc8ec2b..c9b708fb10089a1b9983b7e490bb6b82baea9786 100644
--- a/bench/MatrixInv/Single/HermitianMatInv/main.cpp
+++ b/bench/MatrixInv/Single/HermitianMatInv/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "matrix_utils.hpp"
diff --git a/bench/MatrixMult/Batch/ArmSolve/1x2/bench.py b/bench/MatrixMult/Batch/ArmSolve/1x2/bench.py
index d4a1e3915be607e8b7b1ca6fa14dca73154d9833..e7edca6043f31b7b778683ed6af9e3ba066f4d82 100755
--- a/bench/MatrixMult/Batch/ArmSolve/1x2/bench.py
+++ b/bench/MatrixMult/Batch/ArmSolve/1x2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Batch/ArmSolve/1x2/main.cpp b/bench/MatrixMult/Batch/ArmSolve/1x2/main.cpp
index 5f82b2823201efc4dd33e3d153ef30cdf115f397..9b0453fdd4972cbedbcd4b106839b303abc3c73e 100644
--- a/bench/MatrixMult/Batch/ArmSolve/1x2/main.cpp
+++ b/bench/MatrixMult/Batch/ArmSolve/1x2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 
diff --git a/bench/MatrixMult/Batch/ArmSolve/1x4/bench.py b/bench/MatrixMult/Batch/ArmSolve/1x4/bench.py
index cbc5866b2b032042124ac97022471ca1b7bdbf35..d190973878d057c6fada1a644a7ce1f647f5f356 100755
--- a/bench/MatrixMult/Batch/ArmSolve/1x4/bench.py
+++ b/bench/MatrixMult/Batch/ArmSolve/1x4/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Batch/ArmSolve/1x4/main.cpp b/bench/MatrixMult/Batch/ArmSolve/1x4/main.cpp
index 135427a2c37c876ccdbac19d2cc3aa6f7ac48b0c..1b8bd8bb43cd42a643d257714c6309d466bf1704 100644
--- a/bench/MatrixMult/Batch/ArmSolve/1x4/main.cpp
+++ b/bench/MatrixMult/Batch/ArmSolve/1x4/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 
diff --git a/bench/MatrixMult/Batch/ArmSolve/2x2/bench.py b/bench/MatrixMult/Batch/ArmSolve/2x2/bench.py
index b3f69b1952c6e2210715d6b6dbee86f40a3482a5..aee26677a907174467751aa07697427b1ed8cdc8 100755
--- a/bench/MatrixMult/Batch/ArmSolve/2x2/bench.py
+++ b/bench/MatrixMult/Batch/ArmSolve/2x2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Batch/ArmSolve/2x2/main.cpp b/bench/MatrixMult/Batch/ArmSolve/2x2/main.cpp
index d14a059db2faf1f52d76df4d3d6524db6e818e08..b554c1f6cc9afac83face5d645b1256e914d9161 100644
--- a/bench/MatrixMult/Batch/ArmSolve/2x2/main.cpp
+++ b/bench/MatrixMult/Batch/ArmSolve/2x2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 
diff --git a/bench/MatrixMult/Batch/ArmSolve/2x4/bench.py b/bench/MatrixMult/Batch/ArmSolve/2x4/bench.py
index 697939be3f6a53f0d9717f23df83e84ecb2656d2..9dd2030a9fb5f54dff4f33335a6316bc1bf03440 100755
--- a/bench/MatrixMult/Batch/ArmSolve/2x4/bench.py
+++ b/bench/MatrixMult/Batch/ArmSolve/2x4/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Batch/ArmSolve/2x4/main.cpp b/bench/MatrixMult/Batch/ArmSolve/2x4/main.cpp
index 80fa307a560a1325490eafbd09bbf04288f3fcad..7637055133e6d0609930518a556241af3d7f7d4b 100644
--- a/bench/MatrixMult/Batch/ArmSolve/2x4/main.cpp
+++ b/bench/MatrixMult/Batch/ArmSolve/2x4/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 
diff --git a/bench/MatrixMult/Batch/ArmSolve/4x4/bench.py b/bench/MatrixMult/Batch/ArmSolve/4x4/bench.py
index 0973b259122b0b85eec58101c01fd2a6169deb81..ee929f25253f059b3c33c633b22713f766aa2b4a 100755
--- a/bench/MatrixMult/Batch/ArmSolve/4x4/bench.py
+++ b/bench/MatrixMult/Batch/ArmSolve/4x4/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Batch/ArmSolve/4x4/main.cpp b/bench/MatrixMult/Batch/ArmSolve/4x4/main.cpp
index 5aa2b0f0ba6141966aeb5a5a6b594c3e9968dd7b..4082649fe53e702c312ff24587bfc7f06eba1859 100644
--- a/bench/MatrixMult/Batch/ArmSolve/4x4/main.cpp
+++ b/bench/MatrixMult/Batch/ArmSolve/4x4/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/bench.py b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/bench.py
index 6b745e080f304a5b4c626b3e6290cf33a9c56824..8549eab3111fd62bb699d0265d1adf297903f4e2 100755
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/bench.py
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import itertools
 import json
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/main.cpp b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/main.cpp
index 333fef3c7e4103395cbe77966a6f49604e6e1475..6d0006fb206cc3fe7e6e31efce17a0d0664a4757 100644
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/main.cpp
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/NonPA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/bench.py b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/bench.py
index 99baf98be27cb973606b70c786fb235c7b514a97..4ecaa289625f6c62357924f1fc31400272347936 100755
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/bench.py
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import itertools
 import json
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/main.cpp b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/main.cpp
index ab7f7608b5d6569882734da20dea2c6ce32bb1b7..85b3f96e50d6f9f4976af81ef813d0c7fa9b9df4 100644
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/main.cpp
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/32b/PA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/bench.py b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/bench.py
index 491754eb65bc1096fdd772db39ec324917f5306f..621a7a03f1548fff14dc46b0c6abc55af209b26b 100755
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/bench.py
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import itertools
 import json
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/main.cpp b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/main.cpp
index f7cbfd79133563de97525f49875c79621dad83f9..63034a6edeeeccfd892cbf5ec1c42bedeaf6de8e 100644
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/main.cpp
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/NonPA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/bench.py b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/bench.py
index cb8327ff1d1302461d4bd9d9e3cf25b49221af95..5e88789dc2c415900cff9638b1e80a7535b793e9 100755
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/bench.py
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import itertools
 import json
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/main.cpp b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/main.cpp
index 74252bf9ff3f0a392f58a68c36027a201e8b6cc7..bdfbd196a6c8fe1d72e1a309477673510fef7f68 100644
--- a/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/main.cpp
+++ b/bench/MatrixMult/Batch/MatrixVectorMult16/64b/PA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/bench.py b/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/bench.py
index 4e1dc7c2556f047b03d3290e0fbb96cb7206de0d..0cfde51964b54252cd73f43e374bc7abd2c771f9 100755
--- a/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/bench.py
+++ b/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import itertools
 import json
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/main.cpp b/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/main.cpp
index 4002b90017f40b79a7358b211cb5e2b0390b717f..3344fe6f1d20d43a2168e1e09528081bf898b894 100644
--- a/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/main.cpp
+++ b/bench/MatrixMult/Batch/MatrixVectorMult32/NonPA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult32/PA/bench.py b/bench/MatrixMult/Batch/MatrixVectorMult32/PA/bench.py
index a350fe327caa45d68768574139cb32723b488f4a..78f0bae758c3a12d07f56e3cabf51ca8c6631503 100755
--- a/bench/MatrixMult/Batch/MatrixVectorMult32/PA/bench.py
+++ b/bench/MatrixMult/Batch/MatrixVectorMult32/PA/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import itertools
 import json
diff --git a/bench/MatrixMult/Batch/MatrixVectorMult32/PA/main.cpp b/bench/MatrixMult/Batch/MatrixVectorMult32/PA/main.cpp
index a42ddaa05affc1bcdf9b865b1815135b9b5684b1..0aa6934ccc53fadaa2f415b14a0d76bbf64a015b 100644
--- a/bench/MatrixMult/Batch/MatrixVectorMult32/PA/main.cpp
+++ b/bench/MatrixMult/Batch/MatrixVectorMult32/PA/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixMult16/32b/bench.py b/bench/MatrixMult/Single/MatrixMult16/32b/bench.py
index 0976b824fa958396c2b06daf7e6742ed0baf65eb..5e4312e3e1c5a32b28516a7628aa1a66376a89b8 100755
--- a/bench/MatrixMult/Single/MatrixMult16/32b/bench.py
+++ b/bench/MatrixMult/Single/MatrixMult16/32b/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixMult/Single/MatrixMult16/32b/main.cpp b/bench/MatrixMult/Single/MatrixMult16/32b/main.cpp
index 6309d53f1dc834d0010a76e85091ea90d787b0ca..3462e61a4eb4882dd96ce3153ecde1f3b1c5919a 100644
--- a/bench/MatrixMult/Single/MatrixMult16/32b/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMult16/32b/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixMult16/64b/bench.py b/bench/MatrixMult/Single/MatrixMult16/64b/bench.py
index 2f8b3ea0c04e3b318b2c8b91cc19d19b46e177f5..ef4ad39039a7e0ff913ba0b51cf9599b98987978 100755
--- a/bench/MatrixMult/Single/MatrixMult16/64b/bench.py
+++ b/bench/MatrixMult/Single/MatrixMult16/64b/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixMult/Single/MatrixMult16/64b/main.cpp b/bench/MatrixMult/Single/MatrixMult16/64b/main.cpp
index b4a686a9ed35aca001512cd944861c993dbb7eaf..a1ebda8082cdd30c7807909ca0a1cbe5bc76a5c7 100644
--- a/bench/MatrixMult/Single/MatrixMult16/64b/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMult16/64b/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/bench.py b/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/bench.py
index b8be8948109b486ac89f882b949db3fd60a8dffa..6125bad8f7a505bbc0ba11a95240f6155fa4ac46 100755
--- a/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/bench.py
+++ b/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/main.cpp b/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/main.cpp
index fdc8a19a776456e3eb0edc7d807a8ba6bd4bcd89..890a174cafc7d40af027669750de6aab22b6982e 100644
--- a/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMult32/2x2/IQ/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
diff --git a/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/bench.py b/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/bench.py
index cb0f7cf8fc10fc346c02fe352b73f8eb7245e38a..f58fc63622325c39723f06b4fe925ac62bc80947 100755
--- a/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/bench.py
+++ b/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/main.cpp b/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/main.cpp
index 7df291ab1adeda6b31e9cf1d0ff7d6edbcd8123b..74ab2b7a34a11ab9b234c9efaa2976651ac39ab2 100644
--- a/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMult32/2x2/NonIQ/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/bench.py b/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/bench.py
index 8787e701c502f053999c4be912fe173eea1b898e..c7dd1f99e72633660a44f196b6790f052e40cdfc 100755
--- a/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/bench.py
+++ b/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/main.cpp b/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/main.cpp
index 3d5da966d8b3c881d1683a64954efdfca9df40f9..3ccd0ceac7851cd82c62e8b136132ec1dccc29c9 100644
--- a/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMult32/4x4/IQ/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
diff --git a/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/bench.py b/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/bench.py
index 23edd2b1a848a3b762a270feb2469af0736b3dde..626b61804265975d5350f0ae0f9b35b85c7f09af 100755
--- a/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/bench.py
+++ b/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/main.cpp b/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/main.cpp
index 115789ee21217d77240d439699b1db7f731a8462..d0eb8697ba70904b6d58f570ec845097ce1eb164 100644
--- a/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMult32/4x4/NonIQ/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixMult32/general/bench.py b/bench/MatrixMult/Single/MatrixMult32/general/bench.py
index 2ef90b6e2b7567ec8b464797712380c5013bb913..cae82ac76a655e1583f063665e260cfcdd154a51 100755
--- a/bench/MatrixMult/Single/MatrixMult32/general/bench.py
+++ b/bench/MatrixMult/Single/MatrixMult32/general/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Single/MatrixMult32/general/main.cpp b/bench/MatrixMult/Single/MatrixMult32/general/main.cpp
index d15b3ee204c289419baac7c149b2b40ca73758bc..f37000d52e0bdf7be1a9f5a004ef2c376d8eb1f2 100644
--- a/bench/MatrixMult/Single/MatrixMult32/general/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMult32/general/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixMultAAH32/bench.py b/bench/MatrixMult/Single/MatrixMultAAH32/bench.py
index 02344b0005b7509fb3c177775ffe124a38b34493..c911c26f9c15597c660fa2a1c85b5af8cd349cb5 100755
--- a/bench/MatrixMult/Single/MatrixMultAAH32/bench.py
+++ b/bench/MatrixMult/Single/MatrixMultAAH32/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Single/MatrixMultAAH32/main.cpp b/bench/MatrixMult/Single/MatrixMultAAH32/main.cpp
index fdee30afd1f3aec2e43c7bd64322e3b2e9eac4c0..5ebdcf156611b84d3eba5c7184b091a0d0080443 100644
--- a/bench/MatrixMult/Single/MatrixMultAAH32/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMultAAH32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixMultAHB32/bench.py b/bench/MatrixMult/Single/MatrixMultAHB32/bench.py
index ffee1cbb76e4b81f86d4f10f57508ff6b2c9780e..9a58a3d056aca8e7ffffd538404db30630937225 100755
--- a/bench/MatrixMult/Single/MatrixMultAHB32/bench.py
+++ b/bench/MatrixMult/Single/MatrixMultAHB32/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Single/MatrixMultAHB32/main.cpp b/bench/MatrixMult/Single/MatrixMultAHB32/main.cpp
index cd7693bd6316da98c18cbdda41e071cf29a31db1..1fee8780fd2d2501b98e1eda2e4f2b0eaa36689e 100644
--- a/bench/MatrixMult/Single/MatrixMultAHB32/main.cpp
+++ b/bench/MatrixMult/Single/MatrixMultAHB32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixVectorMult16/32bit/bench.py b/bench/MatrixMult/Single/MatrixVectorMult16/32bit/bench.py
index e63f739cfe6cd273f26aa6661e6596e91d40a4ae..c15b3abf81f8669b4a7387f01e528ffcf857f052 100755
--- a/bench/MatrixMult/Single/MatrixVectorMult16/32bit/bench.py
+++ b/bench/MatrixMult/Single/MatrixVectorMult16/32bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Single/MatrixVectorMult16/32bit/main.cpp b/bench/MatrixMult/Single/MatrixVectorMult16/32bit/main.cpp
index db470ad19c1b73ea8b0974c539c450d5ade89e5d..63e49a220167a9eb74df771157fd1a77c46a1fce 100644
--- a/bench/MatrixMult/Single/MatrixVectorMult16/32bit/main.cpp
+++ b/bench/MatrixMult/Single/MatrixVectorMult16/32bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixVectorMult16/64bit/bench.py b/bench/MatrixMult/Single/MatrixVectorMult16/64bit/bench.py
index 14faaa0f6c466efa72a0ff4a66ce1249f860de86..4ada38d39c87395814f706565351bc3a4b295685 100755
--- a/bench/MatrixMult/Single/MatrixVectorMult16/64bit/bench.py
+++ b/bench/MatrixMult/Single/MatrixVectorMult16/64bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Single/MatrixVectorMult16/64bit/main.cpp b/bench/MatrixMult/Single/MatrixVectorMult16/64bit/main.cpp
index b9da2d4919e2b5c3792f6fc3ba24f51bd68d64bc..8c0c7b1eb45b3a5264af0942b422a51aa22f4637 100644
--- a/bench/MatrixMult/Single/MatrixVectorMult16/64bit/main.cpp
+++ b/bench/MatrixMult/Single/MatrixVectorMult16/64bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MatrixMult/Single/MatrixVectorMult32/bench.py b/bench/MatrixMult/Single/MatrixVectorMult32/bench.py
index 52b224fc7320304bde95c499dd8b35683f5fe8ad..fb5e76256f9c1bf827a923fa3170041ea2a0780b 100755
--- a/bench/MatrixMult/Single/MatrixVectorMult32/bench.py
+++ b/bench/MatrixMult/Single/MatrixVectorMult32/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/MatrixMult/Single/MatrixVectorMult32/main.cpp b/bench/MatrixMult/Single/MatrixVectorMult32/main.cpp
index 36f16aa657a1063bd48b6d065ed84667551ef902..9cc90ba5fedefe818bcb6f8734a85d6d398fdcf3 100644
--- a/bench/MatrixMult/Single/MatrixVectorMult32/main.cpp
+++ b/bench/MatrixMult/Single/MatrixVectorMult32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
diff --git a/bench/MatrixPseudoInv/Direct/bench.py b/bench/MatrixPseudoInv/Direct/bench.py
index a8dbdd3d51ef0bd40e5ea8cab01237ddbaaedda2..fcfb46273a48787b92cdf2ab8c2eef65ab79b655 100755
--- a/bench/MatrixPseudoInv/Direct/bench.py
+++ b/bench/MatrixPseudoInv/Direct/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
@@ -20,10 +20,10 @@ j = {
     "cases": []
 }
 
+size1 = [2, 3, 4, 8, 16]
+size2 = [32, 64, 128, 256]
 
-rows = [2, 3, 4, 8, 16]
-cols = [32, 64, 128, 256]
-for (m, n) in itertools.product(rows, cols):
+for (m, n) in itertools.chain(zip(size1, size2), zip(size2, size1)):
     case = {
         "name": "mat_pseudo_inv_direct_{}_{}".format(m, n),
         "args": "{} {}".format(m, n),
diff --git a/bench/MatrixPseudoInv/Direct/main.cpp b/bench/MatrixPseudoInv/Direct/main.cpp
index e5c24abe1377c2641e66fb0ef558c1ff6a291019..6339c24a29c508cdb819a9f5012b300231d4939b 100644
--- a/bench/MatrixPseudoInv/Direct/main.cpp
+++ b/bench/MatrixPseudoInv/Direct/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -26,7 +26,8 @@ void run_mat_pinv_perf(uint32_t m, uint32_t n, uint32_t num_reps) {
 #ifdef ARMRAL_BENCH_NOALLOC
   // Benchmark only added for interest. This is not expected to show any major
   // performance difference.
-  std::vector<uint8_t> buffer(m * m * sizeof(armral_cmplx_f32_t) + 3);
+  auto size = m > n ? n : m;
+  std::vector<uint8_t> buffer(size * size * sizeof(armral_cmplx_f32_t) + 3);
   for (uint32_t i = 0; i < num_reps; ++i) {
     armral_cmplx_pseudo_inverse_direct_f32_noalloc(m, n, lambda, in_ptr,
                                                    out_ptr, buffer.data());
@@ -52,7 +53,8 @@ int main(int argc, char **argv) {
   auto n = (uint32_t)atoi(argv[2]);
   auto num_reps = (uint32_t)atoi(argv[3]);
 
-  assert(m == 2 || m == 3 || m == 4 || m == 8 || m == 16);
+  [[maybe_unused]] auto size = m > n ? n : m;
+  assert(size == 2 || size == 3 || size == 4 || size == 8 || size == 16);
 
   run_mat_pinv_perf(m, n, num_reps);
 
diff --git a/bench/Modulation/bench.py b/bench/Modulation/bench.py
index 4718fb86573f52df558c11b43f55df0426d1416b..e6dcff60cbc8b5b21d0b59f7a7220c2ebf1ea01e 100755
--- a/bench/Modulation/bench.py
+++ b/bench/Modulation/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/Modulation/main.cpp b/bench/Modulation/main.cpp
index 4e836ac7cca5879413d821bb97663deebe87cfda..5e0f7ba9848dba69a5121fc020b67f6c425621e4 100644
--- a/bench/Modulation/main.cpp
+++ b/bench/Modulation/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MuLaw/Compression/14bit/bench.py b/bench/MuLaw/Compression/14bit/bench.py
index c222bf3a0068092d43d381e34523a738c7a2b11e..2b9ab708016d93ef77192d914fdbfe195b5ed65a 100755
--- a/bench/MuLaw/Compression/14bit/bench.py
+++ b/bench/MuLaw/Compression/14bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MuLaw/Compression/14bit/main.cpp b/bench/MuLaw/Compression/14bit/main.cpp
index 3d60c3022121b85642629339af3f6530ece98be2..73b7699aab0b13cd623ab5b9406cc1cf75cee0a8 100644
--- a/bench/MuLaw/Compression/14bit/main.cpp
+++ b/bench/MuLaw/Compression/14bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MuLaw/Compression/8bit/bench.py b/bench/MuLaw/Compression/8bit/bench.py
index a7aa8e00dbfa8547fa70833dc93e9cf51b0fb4a6..43cefd873b69ffa2750effaf40eb1cc3a92f9a88 100755
--- a/bench/MuLaw/Compression/8bit/bench.py
+++ b/bench/MuLaw/Compression/8bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MuLaw/Compression/8bit/main.cpp b/bench/MuLaw/Compression/8bit/main.cpp
index fbf6c83b5b05611ce0c933df4eb64a7964b05482..2faa911946b91e517a7e4d8c4bc8423d939f539b 100644
--- a/bench/MuLaw/Compression/8bit/main.cpp
+++ b/bench/MuLaw/Compression/8bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MuLaw/Compression/9bit/bench.py b/bench/MuLaw/Compression/9bit/bench.py
index f7f54febd78fcad70ec3431b1073f7400176ea6e..cc24e674f5583b2de0126ab3ef3e8722d640b175 100755
--- a/bench/MuLaw/Compression/9bit/bench.py
+++ b/bench/MuLaw/Compression/9bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MuLaw/Compression/9bit/main.cpp b/bench/MuLaw/Compression/9bit/main.cpp
index fd1641b7a941229326e913f26b9ef3ae308fac96..a2c11187070c29a5f8584a3e773272ef9d46265c 100644
--- a/bench/MuLaw/Compression/9bit/main.cpp
+++ b/bench/MuLaw/Compression/9bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MuLaw/Decompression/14bit/bench.py b/bench/MuLaw/Decompression/14bit/bench.py
index 7b65e7f0fa60bc27da554bfcb304982ee4e02a00..8f6d2b1e18915ffc7e356ec1f7d7cb7e2c6162f2 100755
--- a/bench/MuLaw/Decompression/14bit/bench.py
+++ b/bench/MuLaw/Decompression/14bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MuLaw/Decompression/14bit/main.cpp b/bench/MuLaw/Decompression/14bit/main.cpp
index d8f8cc1706890532b45b586b30a9f1cb53a1d1f6..a24bf218678907c961d44cb399cfbb9305bcd3db 100644
--- a/bench/MuLaw/Decompression/14bit/main.cpp
+++ b/bench/MuLaw/Decompression/14bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MuLaw/Decompression/8bit/bench.py b/bench/MuLaw/Decompression/8bit/bench.py
index 00419a687f48213ede2928fbbd68205535413d28..f70ecafd1da4a1d0e6dce7cf8b112c9add3814b5 100755
--- a/bench/MuLaw/Decompression/8bit/bench.py
+++ b/bench/MuLaw/Decompression/8bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MuLaw/Decompression/8bit/main.cpp b/bench/MuLaw/Decompression/8bit/main.cpp
index 0d9b6ca6d4fc85c2b3c11065c72fec355983b1df..c3a0f0a5a58d86e9c90c5cb8ffe166c31c0b4578 100644
--- a/bench/MuLaw/Decompression/8bit/main.cpp
+++ b/bench/MuLaw/Decompression/8bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/MuLaw/Decompression/9bit/bench.py b/bench/MuLaw/Decompression/9bit/bench.py
index cd0e93997e15025c52b63741358858a82ac0c67d..67512df561b404e5abdd731e5775ca3cc28f6afa 100755
--- a/bench/MuLaw/Decompression/9bit/bench.py
+++ b/bench/MuLaw/Decompression/9bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/MuLaw/Decompression/9bit/main.cpp b/bench/MuLaw/Decompression/9bit/main.cpp
index 0d182228c91fb35694ce89c6842484e2130ebb35..2bcde05b4f61d09d7ecc16d4e6b7471e70ef1fc9 100644
--- a/bench/MuLaw/Decompression/9bit/main.cpp
+++ b/bench/MuLaw/Decompression/9bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ORanBlockScaling/Compression/14bit/bench.py b/bench/ORanBlockScaling/Compression/14bit/bench.py
index 845568cb43a7d1d83e8575614ee936d74fc1478b..e2b2f15b79c723eaea9174b1be4e48bde3dd3409 100755
--- a/bench/ORanBlockScaling/Compression/14bit/bench.py
+++ b/bench/ORanBlockScaling/Compression/14bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ORanBlockScaling/Compression/14bit/main.cpp b/bench/ORanBlockScaling/Compression/14bit/main.cpp
index e1c7b68caac83a6839986c14e62e5aec16d0b9b5..37f8da1247d9f94b972c6524465921f47058c1f8 100644
--- a/bench/ORanBlockScaling/Compression/14bit/main.cpp
+++ b/bench/ORanBlockScaling/Compression/14bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ORanBlockScaling/Compression/8bit/bench.py b/bench/ORanBlockScaling/Compression/8bit/bench.py
index 03f98d26ea23a04302d13e6525ab21648b73b9c2..65d55372bdc61a47ed7854067eadbdbc3c29e4b5 100755
--- a/bench/ORanBlockScaling/Compression/8bit/bench.py
+++ b/bench/ORanBlockScaling/Compression/8bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ORanBlockScaling/Compression/8bit/main.cpp b/bench/ORanBlockScaling/Compression/8bit/main.cpp
index 706b4719941bbf4a30f51ab2fdd8c6acc044b742..43286ca2accaf71cbd6b788997ee7ec1f2a78be7 100644
--- a/bench/ORanBlockScaling/Compression/8bit/main.cpp
+++ b/bench/ORanBlockScaling/Compression/8bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ORanBlockScaling/Compression/9bit/bench.py b/bench/ORanBlockScaling/Compression/9bit/bench.py
index ea6d874e1d3874925097c72e1b533a0a3d17d391..54f99318b9b3804b5443a693c4b42bce567b947e 100755
--- a/bench/ORanBlockScaling/Compression/9bit/bench.py
+++ b/bench/ORanBlockScaling/Compression/9bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ORanBlockScaling/Compression/9bit/main.cpp b/bench/ORanBlockScaling/Compression/9bit/main.cpp
index f203d3b5757b0a8a1f7deb1c84bc16d8f2d9cc1e..7d66f4213df7a39a57c416fa2b01c532ceaec326 100644
--- a/bench/ORanBlockScaling/Compression/9bit/main.cpp
+++ b/bench/ORanBlockScaling/Compression/9bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ORanBlockScaling/Decompression/14bit/bench.py b/bench/ORanBlockScaling/Decompression/14bit/bench.py
index 347eb5c71f02da521192dae7f4a6c21779657c90..cbb57d29cba049ff58e2b312dc634157ebd34a26 100755
--- a/bench/ORanBlockScaling/Decompression/14bit/bench.py
+++ b/bench/ORanBlockScaling/Decompression/14bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ORanBlockScaling/Decompression/14bit/main.cpp b/bench/ORanBlockScaling/Decompression/14bit/main.cpp
index a2852e6244ac2dc3975bd6141b646f4cac3fc6b1..a9448f9012e0244fcbe186eb07b4260a1bdec92d 100644
--- a/bench/ORanBlockScaling/Decompression/14bit/main.cpp
+++ b/bench/ORanBlockScaling/Decompression/14bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ORanBlockScaling/Decompression/8bit/bench.py b/bench/ORanBlockScaling/Decompression/8bit/bench.py
index fdcdc64a39205d8db4528cea2af623f2e48f15d6..2807325192f8838579b248d47e74135a040b8ae8 100755
--- a/bench/ORanBlockScaling/Decompression/8bit/bench.py
+++ b/bench/ORanBlockScaling/Decompression/8bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ORanBlockScaling/Decompression/8bit/main.cpp b/bench/ORanBlockScaling/Decompression/8bit/main.cpp
index c60c504a711f7eec85b2aee737e1e6d195c31c66..60ffeec519bf26ae99e03dc6abb375e43ff157fc 100644
--- a/bench/ORanBlockScaling/Decompression/8bit/main.cpp
+++ b/bench/ORanBlockScaling/Decompression/8bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/ORanBlockScaling/Decompression/9bit/bench.py b/bench/ORanBlockScaling/Decompression/9bit/bench.py
index 5510639545d5b406626c0027a8e823c42dd08d62..f16d82dca3a9266e2e7788a5984ef250a63ccc16 100755
--- a/bench/ORanBlockScaling/Decompression/9bit/bench.py
+++ b/bench/ORanBlockScaling/Decompression/9bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/ORanBlockScaling/Decompression/9bit/main.cpp b/bench/ORanBlockScaling/Decompression/9bit/main.cpp
index 0ead48511930c45d10fae2b90e1684ce941621fd..8cbab20543795c711dc8483bea51d24640f73d7e 100644
--- a/bench/ORanBlockScaling/Decompression/9bit/main.cpp
+++ b/bench/ORanBlockScaling/Decompression/9bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Polar/Decoding/bench.py b/bench/Polar/Decoding/bench.py
index b69cda691eaa5865b47067d9d2007c40837c2221..b9b3ad64cf9c913596c879f2781232af032e1543 100755
--- a/bench/Polar/Decoding/bench.py
+++ b/bench/Polar/Decoding/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Polar/Decoding/main.cpp b/bench/Polar/Decoding/main.cpp
index fe12c2f29d7cb4b82a21e3c40787563ec3166b12..6da1928fc9c0ba8d85dbd0cd78f8982a79bba9ae 100644
--- a/bench/Polar/Decoding/main.cpp
+++ b/bench/Polar/Decoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "rng.hpp"
diff --git a/bench/Polar/Encoding/bench.py b/bench/Polar/Encoding/bench.py
index 8c801cb79678d652281ca782b8d5fcd0e48baa83..d01db8b628d638642ae360e595e8308ba6109bcc 100755
--- a/bench/Polar/Encoding/bench.py
+++ b/bench/Polar/Encoding/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Polar/Encoding/main.cpp b/bench/Polar/Encoding/main.cpp
index e3b1a6976f449bf8586075d3a87fc903e56f7e46..86bd403b4ea42700bbb0346ba2dcf3722a298ba7 100644
--- a/bench/Polar/Encoding/main.cpp
+++ b/bench/Polar/Encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Polar/Frozen/bench.py b/bench/Polar/Frozen/bench.py
index cc3eab0418558e6b88d3d19b009adbb7d0389c78..c25d3c8d616184443521632692cba30a9558cf46 100755
--- a/bench/Polar/Frozen/bench.py
+++ b/bench/Polar/Frozen/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Polar/Frozen/main.cpp b/bench/Polar/Frozen/main.cpp
index 6cd66bddbd73571d8e66f294053402a3b7b39754..8db346aafbe8462e2d3467403310f0a6cab052fd 100644
--- a/bench/Polar/Frozen/main.cpp
+++ b/bench/Polar/Frozen/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Polar/RateMatching/bench.py b/bench/Polar/RateMatching/bench.py
index 8286ded69fe297054cccd116b5252a67597b81dc..fa5715ff2b32375cce9057c0b732856e65b757e4 100755
--- a/bench/Polar/RateMatching/bench.py
+++ b/bench/Polar/RateMatching/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Polar/RateMatching/main.cpp b/bench/Polar/RateMatching/main.cpp
index 4ba48279a003c3bbc121b6030257dbb355ce2d62..af6a831645b67c36eebcf9211094d5ade41f804c 100644
--- a/bench/Polar/RateMatching/main.cpp
+++ b/bench/Polar/RateMatching/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Polar/RateRecovery/bench.py b/bench/Polar/RateRecovery/bench.py
index 1427393c650826d5ea045e69eb29af3e4d9c5468..4687b6dfeb447b44dc007e0d6bcc3a8ff7fff0b2 100755
--- a/bench/Polar/RateRecovery/bench.py
+++ b/bench/Polar/RateRecovery/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Polar/RateRecovery/main.cpp b/bench/Polar/RateRecovery/main.cpp
index 51c32b45076e1b04f9b4f2652b92b89a3c306df6..b687110b0a99d159e66f7e7296b6b1f15a3436d2 100644
--- a/bench/Polar/RateRecovery/main.cpp
+++ b/bench/Polar/RateRecovery/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Polar/SubchannelDeinterleave/bench.py b/bench/Polar/SubchannelDeinterleave/bench.py
index ff19ba9c78ed82a996c178d6cc0f6a7d85fd3157..d804d3bb03d333815421e994a9383e65b71db150 100755
--- a/bench/Polar/SubchannelDeinterleave/bench.py
+++ b/bench/Polar/SubchannelDeinterleave/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Polar/SubchannelDeinterleave/main.cpp b/bench/Polar/SubchannelDeinterleave/main.cpp
index b992f58f343c4929205c8b5f07401c4595f1362b..54e910869df460f1e41978ab7cb1fa27c439b18b 100644
--- a/bench/Polar/SubchannelDeinterleave/main.cpp
+++ b/bench/Polar/SubchannelDeinterleave/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Polar/SubchannelInterleave/bench.py b/bench/Polar/SubchannelInterleave/bench.py
index d3f752ea72d061046824745ab8d50286bb243d7f..8620391efd785cd4f07f90587da1af740bf7e3d8 100755
--- a/bench/Polar/SubchannelInterleave/bench.py
+++ b/bench/Polar/SubchannelInterleave/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Polar/SubchannelInterleave/main.cpp b/bench/Polar/SubchannelInterleave/main.cpp
index 63777bc5e4a5ce12fa0a9339b4c804bf82ccf26a..c2623be63b33cb6912e371255ee4542e9dff1e49 100644
--- a/bench/Polar/SubchannelInterleave/main.cpp
+++ b/bench/Polar/SubchannelInterleave/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/SVD/bench.py b/bench/SVD/bench.py
index fef7e9470b57d0cb3acaaf567951f6f73cc26685..22a8591ac0276283eaf1d15c1033fdb34e35d0d5 100755
--- a/bench/SVD/bench.py
+++ b/bench/SVD/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/SVD/main.cpp b/bench/SVD/main.cpp
index 67f50356758faa42f0af79baddf2d1895490793c..86cba9298b322b1fe844412743d49a98dd0cd29f 100644
--- a/bench/SVD/main.cpp
+++ b/bench/SVD/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "armral.h"
diff --git a/bench/Scrambling/bench.py b/bench/Scrambling/bench.py
index e37a92f1d70a8b56932bd2ecf816b36f3d525d55..ae4e285acec471e2f7c10eec60ede7e7adb64178 100755
--- a/bench/Scrambling/bench.py
+++ b/bench/Scrambling/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Scrambling/main.cpp b/bench/Scrambling/main.cpp
index 3802388e445201933ac6b47c147eb8a4cc9dd604..6d85a8f0ff266773297d47dfd533180eee219f59 100644
--- a/bench/Scrambling/main.cpp
+++ b/bench/Scrambling/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/SeqGenerator/bench.py b/bench/SeqGenerator/bench.py
index a399b28a716044113862bf5c3b00c84010b0040b..7d8ae2756d5908d00d8bc712ce6eeb818af7cae5 100755
--- a/bench/SeqGenerator/bench.py
+++ b/bench/SeqGenerator/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/SeqGenerator/main.cpp b/bench/SeqGenerator/main.cpp
index 6e78e10aa35f7c192f88af3b3fe4850c97f0b1ee..49baa2aa1393737b15e9c5c171333ca54c018a24 100644
--- a/bench/SeqGenerator/main.cpp
+++ b/bench/SeqGenerator/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/bench/Turbo/Decoding/bench.py b/bench/Turbo/Decoding/bench.py
index b2b41066ab6b0073845a33817f19b521d35f1e23..ebd3e3868c53c85f80cc942725dd0a1c09d9cc89 100755
--- a/bench/Turbo/Decoding/bench.py
+++ b/bench/Turbo/Decoding/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/Turbo/Decoding/main.cpp b/bench/Turbo/Decoding/main.cpp
index d77b2f8f86cb898375e5c2705d2bca7b0d8d486d..b0e21bbd7bf5e2caff2d2e5f87cea24176a4ca14 100644
--- a/bench/Turbo/Decoding/main.cpp
+++ b/bench/Turbo/Decoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "turbo_code.hpp"
diff --git a/bench/Turbo/Encoding/bench.py b/bench/Turbo/Encoding/bench.py
index c9a2575fc8b6f2dbaec98235f56eacba39289b34..5c1db10cd9b1482bd060a0d64f6517ef16fb379b 100755
--- a/bench/Turbo/Encoding/bench.py
+++ b/bench/Turbo/Encoding/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import itertools
diff --git a/bench/Turbo/Encoding/main.cpp b/bench/Turbo/Encoding/main.cpp
index 185d0ea51e576de63322bafc02d6ab466d325353..b79df85cb7918faed9f697dbf863fb575adcf112 100644
--- a/bench/Turbo/Encoding/main.cpp
+++ b/bench/Turbo/Encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Turbo/RateMatching/bench.py b/bench/Turbo/RateMatching/bench.py
index b539f7b53e0728be8ff041b6278c8075cbe42cc3..a36a1eab059a2a1c2d028c9e5fd303fecea3a439 100755
--- a/bench/Turbo/RateMatching/bench.py
+++ b/bench/Turbo/RateMatching/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Turbo/RateMatching/main.cpp b/bench/Turbo/RateMatching/main.cpp
index ed622828547ac85c9fa427ffefc0ed263a7c9e2e..809bf148a0be7ef96bbb09fa655d42d45583b821 100644
--- a/bench/Turbo/RateMatching/main.cpp
+++ b/bench/Turbo/RateMatching/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/Turbo/RateRecovery/bench.py b/bench/Turbo/RateRecovery/bench.py
index 76107bf7314a8f68401d74171fc5ca2ca525b76c..2cc54c28695c2e54b8923632b4bb60098aea21ea 100755
--- a/bench/Turbo/RateRecovery/bench.py
+++ b/bench/Turbo/RateRecovery/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/Turbo/RateRecovery/main.cpp b/bench/Turbo/RateRecovery/main.cpp
index 42974afaca9ddf455d5f38fe4de40146986f8ca4..61d0e780cda38dd51d6c56996f8bdb68670025d0 100644
--- a/bench/Turbo/RateRecovery/main.cpp
+++ b/bench/Turbo/RateRecovery/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/VectorDotProd/VecDot16/bench.py b/bench/VectorDotProd/VecDot16/bench.py
index 66928a5910f302e004bf71be47186b85297e0667..4c4bacd3b4d566bc4676ee8f30120b233e025938 100755
--- a/bench/VectorDotProd/VecDot16/bench.py
+++ b/bench/VectorDotProd/VecDot16/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/VectorDotProd/VecDot16/main.cpp b/bench/VectorDotProd/VecDot16/main.cpp
index a77e7acaf7aa5bbbea118c89cd953b8073ed8dc6..4d2179f3da195fbbe969917ab5544c4db7469ffd 100644
--- a/bench/VectorDotProd/VecDot16/main.cpp
+++ b/bench/VectorDotProd/VecDot16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/VectorDotProd/VecDot16_2/bench.py b/bench/VectorDotProd/VecDot16_2/bench.py
index 56c75a287ec65b2b6ae15993b1622e774c576b12..18d099c07f3f56e28e568c4c526774f38d4c1c59 100755
--- a/bench/VectorDotProd/VecDot16_2/bench.py
+++ b/bench/VectorDotProd/VecDot16_2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/VectorDotProd/VecDot16_2/main.cpp b/bench/VectorDotProd/VecDot16_2/main.cpp
index 31a2a53d35d9d70254230313986306bf1fece585..356bcfbf7e5d2e6cd1f08c4ad787e7d00edd86c1 100644
--- a/bench/VectorDotProd/VecDot16_2/main.cpp
+++ b/bench/VectorDotProd/VecDot16_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/VectorDotProd/VecDot16_2_32bit/bench.py b/bench/VectorDotProd/VecDot16_2_32bit/bench.py
index d016bf06470c1753b870f33cac8aebe898b6f054..3f1b23afe529ead4f50038ae997537cf43a115d4 100755
--- a/bench/VectorDotProd/VecDot16_2_32bit/bench.py
+++ b/bench/VectorDotProd/VecDot16_2_32bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/VectorDotProd/VecDot16_2_32bit/main.cpp b/bench/VectorDotProd/VecDot16_2_32bit/main.cpp
index d942ae224fde3af21f9df7d6e7c992208f223d14..2e6377e7b269a68156a47713a2ab8fa900e7b6a9 100644
--- a/bench/VectorDotProd/VecDot16_2_32bit/main.cpp
+++ b/bench/VectorDotProd/VecDot16_2_32bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/VectorDotProd/VecDot16_32bit/bench.py b/bench/VectorDotProd/VecDot16_32bit/bench.py
index af15ac12eaf1f45f10270dcc72a06a15bf891a16..2dd7bdd9f72699c86943da383ed055ee524087c8 100755
--- a/bench/VectorDotProd/VecDot16_32bit/bench.py
+++ b/bench/VectorDotProd/VecDot16_32bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/VectorDotProd/VecDot16_32bit/main.cpp b/bench/VectorDotProd/VecDot16_32bit/main.cpp
index a2fb763df4178a7331912792e5536a428d0f68df..0a0f27dbfa654131fe811fd0f96b902498b5a52d 100644
--- a/bench/VectorDotProd/VecDot16_32bit/main.cpp
+++ b/bench/VectorDotProd/VecDot16_32bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/VectorDotProd/VecDot32/bench.py b/bench/VectorDotProd/VecDot32/bench.py
index 18a9f1fbf01a776840f6da9fd6450d1e4991c1e8..13764c41721512188cac1ee623fa35cb5604b714 100755
--- a/bench/VectorDotProd/VecDot32/bench.py
+++ b/bench/VectorDotProd/VecDot32/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/VectorDotProd/VecDot32/main.cpp b/bench/VectorDotProd/VecDot32/main.cpp
index 9de48e531d05ab46414d731a4000cd35e7fa67c8..5ecf2c1b2d68cb6be0930c9368d7edbd94dd925a 100644
--- a/bench/VectorDotProd/VecDot32/main.cpp
+++ b/bench/VectorDotProd/VecDot32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/VectorDotProd/VecDot32_2/bench.py b/bench/VectorDotProd/VecDot32_2/bench.py
index 03a00de61ef260b08516efcbc200e1fc71efa177..c249222f5e4f3ef05855ea796e002ebc95ecace1 100755
--- a/bench/VectorDotProd/VecDot32_2/bench.py
+++ b/bench/VectorDotProd/VecDot32_2/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/VectorDotProd/VecDot32_2/main.cpp b/bench/VectorDotProd/VecDot32_2/main.cpp
index 5718396103994600d431a9ab9074a3371f9d2e8a..0365c3084fc443885af64516082bda7ccead6313 100644
--- a/bench/VectorDotProd/VecDot32_2/main.cpp
+++ b/bench/VectorDotProd/VecDot32_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/XRanBlockFloat/Compression/12bit/bench.py b/bench/XRanBlockFloat/Compression/12bit/bench.py
index c3c3d6940a61d4cb520c4da2acbbfdfb84a6953e..744bd01eeceae7921b394a09922c7206e23e9589 100755
--- a/bench/XRanBlockFloat/Compression/12bit/bench.py
+++ b/bench/XRanBlockFloat/Compression/12bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Compression/12bit/main.cpp b/bench/XRanBlockFloat/Compression/12bit/main.cpp
index 7654ce9d37b78e89e8d800c2c657f8be726f6beb..ec36a751d3566f336b1062c34e83ccbb30891dc0 100644
--- a/bench/XRanBlockFloat/Compression/12bit/main.cpp
+++ b/bench/XRanBlockFloat/Compression/12bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -34,7 +34,7 @@ int main(int argc, char **argv) {
   if (argc != 4) {
     // nprbs    - The number of physical resource blocks
     // scale    - Phase compensation term
-    // num_reps - The number of times to repeat the functio
+    // num_reps - The number of times to repeat the function
     fprintf(stderr, "usage: %s nprbs scale nreps\n", argv[0]);
     exit(EXIT_FAILURE);
   }
diff --git a/bench/XRanBlockFloat/Compression/14bit/bench.py b/bench/XRanBlockFloat/Compression/14bit/bench.py
index f3da37b5e0b87ff5bd8df3bd185e0b27e7c80438..10f2e16f31bfcb14c8407ec24f72b070f03eea8f 100755
--- a/bench/XRanBlockFloat/Compression/14bit/bench.py
+++ b/bench/XRanBlockFloat/Compression/14bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Compression/14bit/main.cpp b/bench/XRanBlockFloat/Compression/14bit/main.cpp
index adba31a6bf7e8237bacd16616b5f05fa7bdc50a7..eff869855731e9db8b862dd4fef6e408c8738a62 100644
--- a/bench/XRanBlockFloat/Compression/14bit/main.cpp
+++ b/bench/XRanBlockFloat/Compression/14bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -34,7 +34,7 @@ int main(int argc, char **argv) {
   if (argc != 4) {
     // nprbs    - The number of physical resource blocks
     // scale    - Phase compensation term
-    // num_reps - The number of times to repeat the functio
+    // num_reps - The number of times to repeat the function
     fprintf(stderr, "usage: %s nprbs scale nreps\n", argv[0]);
     exit(EXIT_FAILURE);
   }
diff --git a/bench/XRanBlockFloat/Compression/8bit/bench.py b/bench/XRanBlockFloat/Compression/8bit/bench.py
index 7f8208d03229d839b24b11cb211520cab52c6105..3e5f2f324f2482ab5700b3cbad461ab6f81cc348 100755
--- a/bench/XRanBlockFloat/Compression/8bit/bench.py
+++ b/bench/XRanBlockFloat/Compression/8bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Compression/8bit/main.cpp b/bench/XRanBlockFloat/Compression/8bit/main.cpp
index eebb33507604f8cc970eadc97e5afe90e5f0f837..1aa7d2c5fd33e0d05fb7e034edfde57096d26774 100644
--- a/bench/XRanBlockFloat/Compression/8bit/main.cpp
+++ b/bench/XRanBlockFloat/Compression/8bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -34,7 +34,7 @@ int main(int argc, char **argv) {
   if (argc != 4) {
     // nprbs    - The number of physical resource blocks
     // scale    - Phase compensation term
-    // num_reps - The number of times to repeat the functio
+    // num_reps - The number of times to repeat the function
     fprintf(stderr, "usage: %s nprbs scale nreps\n", argv[0]);
     exit(EXIT_FAILURE);
   }
diff --git a/bench/XRanBlockFloat/Compression/9bit/bench.py b/bench/XRanBlockFloat/Compression/9bit/bench.py
index eed68252617e648f2f66b2a5e462c50b218234a0..73391e2e5e9b12eca53c62c1db9ab071c12246e7 100755
--- a/bench/XRanBlockFloat/Compression/9bit/bench.py
+++ b/bench/XRanBlockFloat/Compression/9bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Compression/9bit/main.cpp b/bench/XRanBlockFloat/Compression/9bit/main.cpp
index 7d9865e3c246b4961581289662d7579b53127b8c..6a96d355bde791d29ec1c6d06a34af2127b4e0fd 100644
--- a/bench/XRanBlockFloat/Compression/9bit/main.cpp
+++ b/bench/XRanBlockFloat/Compression/9bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -34,7 +34,7 @@ int main(int argc, char **argv) {
   if (argc != 4) {
     // nprbs    - The number of physical resource blocks
     // scale    - Phase compensation term
-    // num_reps - The number of times to repeat the functio
+    // num_reps - The number of times to repeat the function
     fprintf(stderr, "usage: %s nprbs scale nreps\n", argv[0]);
     exit(EXIT_FAILURE);
   }
diff --git a/bench/XRanBlockFloat/Decompression/12bit/bench.py b/bench/XRanBlockFloat/Decompression/12bit/bench.py
index 5f251872e6fbf2d436cac682ba5938e05f718d80..f9ec6f833a8f9a602668aa509259614332a71af8 100755
--- a/bench/XRanBlockFloat/Decompression/12bit/bench.py
+++ b/bench/XRanBlockFloat/Decompression/12bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Decompression/12bit/main.cpp b/bench/XRanBlockFloat/Decompression/12bit/main.cpp
index bd864a47b380e0c46da24c134735ffd4c6a1cbd1..9816ac1759f9d8acfe55b9fa84cd70622e4ef7c2 100644
--- a/bench/XRanBlockFloat/Decompression/12bit/main.cpp
+++ b/bench/XRanBlockFloat/Decompression/12bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/XRanBlockFloat/Decompression/14bit/bench.py b/bench/XRanBlockFloat/Decompression/14bit/bench.py
index bb29e7e65b4befa787cd589c9d6de2b0908ec419..1f08f502878812af51b09776f185b3bb8cbfdc2b 100755
--- a/bench/XRanBlockFloat/Decompression/14bit/bench.py
+++ b/bench/XRanBlockFloat/Decompression/14bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Decompression/14bit/main.cpp b/bench/XRanBlockFloat/Decompression/14bit/main.cpp
index 2488e0a36f02a3f765cb039e3b85734f92ab4d6a..52226a5c1a564c53c782e90340d3b45725ab2a9e 100644
--- a/bench/XRanBlockFloat/Decompression/14bit/main.cpp
+++ b/bench/XRanBlockFloat/Decompression/14bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/XRanBlockFloat/Decompression/8bit/bench.py b/bench/XRanBlockFloat/Decompression/8bit/bench.py
index 7c7dc3b301aede016cc2332ad7c7ea7a2f367c28..f20eb2b1eaa0886917e3aca46e5e2f71c813fdea 100755
--- a/bench/XRanBlockFloat/Decompression/8bit/bench.py
+++ b/bench/XRanBlockFloat/Decompression/8bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Decompression/8bit/main.cpp b/bench/XRanBlockFloat/Decompression/8bit/main.cpp
index 1e79256bb3ef73000146af7ad6e1db68c58e7c3f..7734d1207c3a971d67d3dcfbbad810910924c6e5 100644
--- a/bench/XRanBlockFloat/Decompression/8bit/main.cpp
+++ b/bench/XRanBlockFloat/Decompression/8bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/XRanBlockFloat/Decompression/9bit/bench.py b/bench/XRanBlockFloat/Decompression/9bit/bench.py
index 2a35fd3cf94d7c0bb21a88e3b6a960e7f41d8838..5cf57a71a0322043fe233d4eac530e90e3ac6926 100755
--- a/bench/XRanBlockFloat/Decompression/9bit/bench.py
+++ b/bench/XRanBlockFloat/Decompression/9bit/bench.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 from pathlib import Path
diff --git a/bench/XRanBlockFloat/Decompression/9bit/main.cpp b/bench/XRanBlockFloat/Decompression/9bit/main.cpp
index 0ce037d94aae1d12ae1ea2a727ff9a3606a7089f..1e868ffca297b32471ad9e9226da586056c88feb 100644
--- a/bench/XRanBlockFloat/Decompression/9bit/main.cpp
+++ b/bench/XRanBlockFloat/Decompression/9bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/bench/benchmarker.py b/bench/benchmarker.py
index 17a67abe8979bbc800e85ee3862fe6410e0eba03..d3c2d6acc139f78bfa7ae641c8e83bcd8b7a9574 100755
--- a/bench/benchmarker.py
+++ b/bench/benchmarker.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 
 # This program is for benchmarking the performance of armral functions.
diff --git a/bench/benchmarker_utils.py b/bench/benchmarker_utils.py
index 06a701b23825827d0b11e5d2453f0bb1e848898a..c369eb20a1c3746182e7059bcc71192070e2b079 100755
--- a/bench/benchmarker_utils.py
+++ b/bench/benchmarker_utils.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 import collections
 import os
 import subprocess
diff --git a/bench/default_runner.py b/bench/default_runner.py
index bec13d03f7a8c19fadac571adc3e33fcf79a680c..e5cb3ca87938f3cdf83d73c41eb5e784ba08483c 100755
--- a/bench/default_runner.py
+++ b/bench/default_runner.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 import json
 import argparse
diff --git a/docs/doxywrapper/arm_footer.html b/docs/doxywrapper/arm_footer.html
index 1851bd04514c5c8a7e70a4b3506f3718ab9cdef3..93fae82a8d40a24c5501732fabc7d661c0d1985a 100644
--- a/docs/doxywrapper/arm_footer.html
+++ b/docs/doxywrapper/arm_footer.html
@@ -4,14 +4,14 @@
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
     $navpath
-    <li class="footer">Copyright &copy; 2020-2023 Arm Limited (or its affiliates). All rights reserved.</li>
+    <li class="footer">Copyright &copy; 2020-2024 Arm Limited (or its affiliates). All rights reserved.</li>
   </ul>
 </div>
 <!--END GENERATE_TREEVIEW-->
 <!--BEGIN !GENERATE_TREEVIEW-->
 <hr class="footer"/>
 <address class="footer">
-<small>Copyright &copy; 2020-2023 Arm Limited (or its affiliates). All rights reserved.</small>
+<small>Copyright &copy; 2020-2024 Arm Limited (or its affiliates). All rights reserved.</small>
 </address>
 <!--END !GENERATE_TREEVIEW-->
 </body>
diff --git a/docs/doxywrapper/proprietary_notice.html b/docs/doxywrapper/proprietary_notice.html
index d91418dcf25894b99f76af1626a375d09ead0bcd..931d1028ff762d9bdd7b413fd1776787dca419a0 100644
--- a/docs/doxywrapper/proprietary_notice.html
+++ b/docs/doxywrapper/proprietary_notice.html
@@ -47,7 +47,7 @@ document may be the trademarks of their respective owners. Please follow Arm's
 trademark usage guidelines at
 https://www.arm.com/company/policies/trademarks.</p>
 
-<p>Copyright &copy; 2020-2023 Arm Limited (or its affiliates). All rights reserved.<br/>
+<p>Copyright &copy; 2020-2024 Arm Limited (or its affiliates). All rights reserved.<br/>
 Arm Limited. Company 02557590 registered in England.<br/>
 110 Fulbourn Road, Cambridge, England CB1 9NJ.<br/>
 (LES-PRE-20349)</p>
diff --git a/docs/examples.md b/docs/examples.md
index 04db467e5990c6b0d61cde590562bf6ed78f3cde..ebfeff5855d9ce515557f04f1eb9a89a2af86705 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -17,9 +17,9 @@ Acceleration Library (ArmRAL).
 
   To build the library, use:
 
-      tar zxvf ral-armral-23.10.tar.gz
-      mkdir ral-armral-23.10/build
-      cd ral-armral-23.10/build
+      tar zxvf ral-armral-24.01.tar.gz
+      mkdir ral-armral-24.01/build
+      cd ral-armral-24.01/build
       cmake ..
       make -j
 
diff --git a/docs/frontmatter.md b/docs/frontmatter.md
index aa4cfdd29414e87b1db7379d6f1e5a8dce4f61e4..c5fd9bfe45a9a286bc0d31e9619da2c05b1fd5fc 100644
--- a/docs/frontmatter.md
+++ b/docs/frontmatter.md
@@ -1,6 +1,6 @@
 # Arm RAN Acceleration Library (ArmRAL) Reference Guide
 
-Copyright © 2020-2023 Arm Limited (or its affiliates). All rights reserved.
+Copyright © 2020-2024 Arm Limited (or its affiliates). All rights reserved.
 
 # About this book
 
@@ -39,7 +39,7 @@ supplier and give:
 If you have any comments on content, send an e-mail to errata@arm.com. Give:
 
 * The title Arm RAN Acceleration Library Reference Guide.
-* The number 102249_2310_00_en.
+* The number 102249_2401_00_en.
 * If applicable, the relevant page number(s) to which your comments refer.
 * A concise explanation of your comments.
 
@@ -95,7 +95,7 @@ rights reserved. Other brands and names mentioned in this document may be the
 trademarks of their respective owners. Please follow Arm's trademark usage
 guidelines at https://www.arm.com/company/policies/trademarks.
 
-Copyright © 2020-2023 Arm Limited (or its affiliates). All rights reserved.
+Copyright © 2020-2024 Arm Limited (or its affiliates). All rights reserved.
 
 Arm Limited. Company 02557590 registered in England.
 
@@ -148,3 +148,4 @@ Issue   | Date            | Confidentiality  | Change
 2304-00 | 21 April 2023   | Non-Confidential | Update for Arm RAN Acceleration Library v23.04
 2307-00 | 07 July 2023    | Non-Confidential | Update for Arm RAN Acceleration Library v23.07
 2310-00 | 06 October 2023 | Non-Confidential | Update for Arm RAN Acceleration Library v23.10
+2401-00 | 19 January 2024 | Non-Confidential | Update for Arm RAN Acceleration Library v24.01
diff --git a/examples/block_float_9b_example.c b/examples/block_float_9b_example.c
index 8cab3fa400aa24a9df5faa1a055dbd6b3b5c5fbb..8abe390708dbefbfdd954b074a402d6dc44f552f 100644
--- a/examples/block_float_9b_example.c
+++ b/examples/block_float_9b_example.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/examples/fft_cf32_example.c b/examples/fft_cf32_example.c
index 8a29db7d0535a78a8a9ca973e535c2fc9d56419c..690d876d3b9f0d17d56371c754e9098c7ba9a87d 100644
--- a/examples/fft_cf32_example.c
+++ b/examples/fft_cf32_example.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/examples/modulation_example.c b/examples/modulation_example.c
index 791ef4cdbfc8e0af825cbf744314ee8513f015c4..3ee95d6ef229a402573e9ae7f1d81da027c3392d 100644
--- a/examples/modulation_example.c
+++ b/examples/modulation_example.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/examples/polar_example.cpp b/examples/polar_example.cpp
index ca0c50ecccbd1471f775b63b34411d8dddde49d7..5648f308dc9990b73d03872b07e4f9e44aaa6e9d 100644
--- a/examples/polar_example.cpp
+++ b/examples/polar_example.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/include/.clang-tidy b/include/.clang-tidy
index 3d1092ed5c14caf217d8451a247a5fd06c21d689..d8f91b4d9cd6a6c3f4c69c51686c7deeecabf927 100644
--- a/include/.clang-tidy
+++ b/include/.clang-tidy
@@ -2,7 +2,6 @@
 Checks: ''
 WarningsAsErrors: ''
 HeaderFilterRegex: ''
-AnalyzeTemporaryDtors: false
 FormatStyle:     file
 InheritParentConfig: true
 CheckOptions:
diff --git a/include/armral.h b/include/armral.h
index 7e77bf545bc68209a1d990f5766ea19e3bad0201..2f247eb1f08ef444ec9b687c98200e1a298800f1 100644
--- a/include/armral.h
+++ b/include/armral.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
@@ -1626,23 +1626,35 @@ armral_cmplx_mat_inverse_batch_f32_pa(uint32_t num_mats, uint32_t size,
  */
 /**
  * Computes the regularized pseudo-inverse of a single matrix. The `N-by-M`
- * regularized pseudo-inverse `C` of an `M-by-N` matrix `A` with `M <= N` is
- * defined as:
+ * regularized pseudo-inverse `C` of an `M-by-N` matrix `A` is defined as:
  *
  * <pre>
  *   C = A^H * (A * A^H + λ * I)^-1
  * </pre>
  *
+ * for `M <= N`, and is defined as:
+ *
+ * <pre>
+ *   C = (A^H * A + λ * I)^-1 * A^H
+ * </pre>
+ *
+ * for `M > N`.
+ *
  * This function performs numerical matrix inversion using the Schur complement
- * to compute the regularized pseudo-inverse of `A` directly from this
- * expression.
+ * to compute the regularized pseudo-inverse of `A` directly from the
+ * appropriate expression.
  *
  * \warning This method is numerically unstable for matrices that are not very
  * well conditioned.
  *
  * The input matrix `p_src` and output matrix `p_dst` are stored contiguously
- * in memory, in row-major order. The number of rows `m` in the input matrix
- * must be 2, 3, 4, 8 or 16 and `m <= n`.
+ * in memory, in row-major order.
+ *
+ * \note
+ * - If `m <= n` the number of rows `m` in the input matrix must be 2, 3, 4,
+ * 8 or 16.
+ * - If `m > n` the number of columns `n` in the input matrix must be 2, 3,
+ * 4, 8 or 16.
  *
  * @param[in]  m         The number of rows in input matrix `A`.
  * @param[in]  n         The number of columns in input matrix `A`.
@@ -1660,29 +1672,45 @@ armral_cmplx_pseudo_inverse_direct_f32(uint16_t m, uint16_t n, float32_t lambda,
  * Non-allocating variant of \link armral_cmplx_pseudo_inverse_direct_f32
  * \endlink.
  *
- * This function computes the regularized pseudo-inverse of a single matrix.
- * The `N-by-M` regularized pseudo-inverse `C` of an `M-by-N` matrix `A`
- * with `M <= N` is defined as:
+ * Computes the regularized pseudo-inverse of a single matrix. The `N-by-M`
+ * regularized pseudo-inverse `C` of an `M-by-N` matrix `A` is defined as:
  *
  * <pre>
  *   C = A^H * (A * A^H + λ * I)^-1
  * </pre>
  *
+ * for `M <= N`, and is defined as:
+ *
+ * <pre>
+ *   C = (A^H * A + λ * I)^-1 * A^H
+ * </pre>
+ *
+ * for `M > N`.
+ *
  * This function performs numerical matrix inversion using the Schur complement
- * to compute the regularized pseudo-inverse of `A` directly from this
- * expression.
+ * to compute the regularized pseudo-inverse of `A` directly from the
+ * appropriate expression.
  *
  * \warning This method is numerically unstable for matrices that are not very
  * well conditioned.
  *
  * The input matrix `p_src` and output matrix `p_dst` are stored contiguously
- * in memory, in row-major order. The number of rows `m` in the input matrix
- * must be 2, 3, 4, 8 or 16 and `m <= n`.
+ * in memory, in row-major order.
+ *
+ * \note
+ * - If `m <= n` the number of rows `m` in the input matrix must be 2, 3, 4,
+ * 8 or 16.
+ * - If `m > n` the number of columns `n` in the input matrix must be 2, 3,
+ * 4, 8 or 16.
  *
  * This function takes a pre-allocated buffer (`buffer`) to use internally.
  * This variant will not call any system memory allocators.
  *
- * The buffer must be at least `m * m * sizeof(armral_cmplx_f32_t) + 3` bytes.
+ * \note
+ * - If `m <= n` the buffer must be at least
+ * `m * m * sizeof(armral_cmplx_f32_t) + 3` bytes.
+ * - If `m > n` the buffer must be at least
+ * `n * n * sizeof(armral_cmplx_f32_t) + 3` bytes.
  *
  * @param[in]  m         The number of rows in input matrix `A`.
  * @param[in]  n         The number of columns in input matrix `A`.
@@ -2557,7 +2585,7 @@ armral_status armral_crc16_le(uint32_t size, const uint64_t *input,
  *
  * @param[in]     size      The number of bytes of the given buffer.
  * @param[in]     input     Points to the input byte sequence.
- * @param[out]    crc16     The computed CRC on 16 bit.
+ * @param[out]    crc16     The computed 16-bit CRC result.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_crc16_be(uint32_t size, const uint64_t *input,
@@ -2581,7 +2609,7 @@ armral_status armral_crc11_le(uint32_t size, const uint64_t *input,
  *
  * @param[in]     size      The number of bytes of the given buffer.
  * @param[in]     input     Points to the input byte sequence.
- * @param[out]    crc11     The computed CRC on 11 bit.
+ * @param[out]    crc11     The computed 11-bit CRC result.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_crc11_be(uint32_t size, const uint64_t *input,
@@ -2605,7 +2633,7 @@ armral_status armral_crc6_le(uint32_t size, const uint64_t *input,
  *
  * @param[in]     size      The number of bytes of the given buffer.
  * @param[in]     input     Points to the input byte sequence.
- * @param[out]    crc6      The computed CRC on 6 bit.
+ * @param[out]    crc6      The computed 6-bit CRC result.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_crc6_be(uint32_t size, const uint64_t *input,
@@ -2674,9 +2702,12 @@ armral_status armral_crc6_be(uint32_t size, const uint64_t *input,
  * @param[in]  n_pc     The number of parity bits in the encoded message.
  * @param[in]  n_pc_wm  The number of row-weight-selected parity bits in the
  *                      encoded message. Must be either zero or one.
- * @param[out] frozen   The output `frozen` mask, length `n` bytes. Elements
- *                      corresponding to `frozen` bits are set to all ones,
- *                      everything else set to zero.
+ * @param[out] frozen   The output `frozen` mask, length `n` bytes. As described
+ *                      by `armral_polar_frozen_bit_type`, elements
+ *                      corresponding to `frozen` bits are set to `0xFF`,
+ *                      elements corresponding to parity bits are set to `0x01`,
+ *                      and elements corresponding to information bits are set
+ *                      to `0x00`.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_polar_frozen_mask(uint32_t n, uint32_t e, uint32_t k,
@@ -2782,8 +2813,8 @@ armral_status armral_polar_decode_block(uint32_t n, const uint8_t *frozen,
 
 /**
  * Matches the rate of the Polar encoded code block to the rate of the channel
- * using sub-block interleaving, bit selection, and channel interleaving based on
- * Downlink or Uplink direction. This is as described in the 3GPP Technical
+ * using sub-block interleaving, bit selection, and channel interleaving based
+ * on Downlink or Uplink direction. This is as described in the 3GPP Technical
  * Specification (TS) 38.212 section 5.4.1.
  *
  * The code rate of the code block is defined by the ratio of the rate-matched
@@ -2795,7 +2826,8 @@ armral_status armral_polar_decode_block(uint32_t n, const uint8_t *frozen,
  * @param[in]  n           The number of bits in the code block.
  * @param[in]  e           The number of bits in the rate-matched message.
  * @param[in]  k           The number of information bits in the code block.
- * @param[in]  i_bil       Flag to enable/disable the interleaving of coded bits.
+ * @param[in]  i_bil       Flag to enable/disable the interleaving of coded
+ *                         bits.
  * @param[in]  p_d_seq_in  Points to `n` bits representing the Polar encoded
  *                         message.
  * @param[out] p_f_seq_out Points to `e` bits representing the rate-matched
@@ -2810,10 +2842,10 @@ armral_status armral_polar_rate_matching(uint32_t n, uint32_t e, uint32_t k,
 /**
  * Non-allocating variant of \link armral_polar_rate_matching \endlink.
  *
- * This function matches the rate of the Polar encoded code block to the rate
- * of the channel using sub-block interleaving, bit selection, and channel
- * interleaving. This is as described in the 3GPP Technical Specification (TS)
- * 38.212 section 5.4.1.
+ * Matches the rate of the Polar encoded code block to the rate of the channel
+ * using sub-block interleaving, bit selection, and channel interleaving based
+ * on Downlink or Uplink direction. This is as described in the 3GPP Technical
+ * Specification (TS) 38.212 section 5.4.1.
  *
  * The code rate of the code block is defined by the ratio of the rate-matched
  * length `e` to the number of information bits in the message `k`. It is
@@ -2832,7 +2864,8 @@ armral_status armral_polar_rate_matching(uint32_t n, uint32_t e, uint32_t k,
  * @param[in]  n           The number of bits in the code block.
  * @param[in]  e           The number of bits in the rate-matched message.
  * @param[in]  k           The number of information bits in the code block.
- * @param[in]  i_bil       Flag to enable/disable the interleaving of coded bits.
+ * @param[in]  i_bil       Flag to enable/disable the interleaving of coded
+ *                         bits.
  * @param[in]  p_d_seq_in  Points to `n` bits representing the Polar encoded
  *                         message.
  * @param[out] p_f_seq_out Points to `e` bits representing the rate-matched
@@ -3541,7 +3574,7 @@ uint32_t armral_ldpc_decode_block_noalloc_buffer_size(armral_ldpc_graph_t bg,
  *                            section 5.2.2, filler bits insertion is needed to
  *                            ensure that the code block segments have a valid
  *                            length and are a multiple of the lifting size.
- * @param[in]  k              codeblock size, the number of bits to encode as
+ * @param[in]  k              Codeblock size, the number of bits to encode as
  *                            per section 5.3.2 of TS 38.212.
  * @param[in]  rv             Redundancy version used in rate matching. Must be
  *                            in the set `{0, 1, 2, 3}`. The effect of choosing
@@ -3603,7 +3636,7 @@ armral_status armral_ldpc_rate_matching(armral_ldpc_graph_t bg, uint32_t z,
  *                            section 5.2.2, filler bits insertion is needed to
  *                            ensure that the code block segments have a valid
  *                            length and are a multiple of the lifting size.
- * @param[in]  k              codeblock size, the number of bits to encode as
+ * @param[in]  k              Codeblock size, the number of bits to encode as
  *                            per section 5.3.2 of TS 38.212.
  * @param[in]  rv             Redundancy version used in rate matching. Must be
  *                            in the set `{0, 1, 2, 3}`. The effect of choosing
@@ -3666,7 +3699,7 @@ armral_status armral_ldpc_rate_matching_noalloc(
  *                             section 5.2.2, filler bits insertion is needed to
  *                             ensure that the code block segments have a valid
  *                             length and are a multiple of the lifting size.
- * @param[in]     k            codeblock size, the number of bits to encode as
+ * @param[in]     k            Codeblock size, the number of bits to encode as
  *                             per section 5.3.2 of TS 38.212.
  * @param[in]     rv           Redundancy version used in rate matching. Must be
  *                             in the set `{0, 1, 2, 3}`. The effect of choosing
@@ -3737,7 +3770,7 @@ armral_status armral_ldpc_rate_recovery(armral_ldpc_graph_t bg, uint32_t z,
  *                            section 5.2.2, filler bits insertion is needed to
  *                            ensure that the code block segments have a valid
  *                            length and are a multiple of the lifting size.
- * @param[in]     k           codeblock size, the number of bits to encode as
+ * @param[in]     k           Codeblock size, the number of bits to encode as
  *                            per section 5.3.2 of TS 38.212.
  * @param[in]     rv          Redundancy version used in rate matching. Must be
  *                            in the set `{0, 1, 2, 3}`. The effect of choosing
diff --git a/license_terms/BSD-3-Clause.txt b/license_terms/BSD-3-Clause.txt
index b225ef5df50d6e6c8f6d9f2ebac661acd63c54eb..10ce6d47a47a0a67137b55ce616bd64256f4856d 100644
--- a/license_terms/BSD-3-Clause.txt
+++ b/license_terms/BSD-3-Clause.txt
@@ -1,4 +1,4 @@
-Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
diff --git a/simulation/README.md b/simulation/README.md
index 8540d800c3fc0c1c3a8db527238adeabf535b4b2..77829a405eadd841eed22520b8d15f494b21df56 100644
--- a/simulation/README.md
+++ b/simulation/README.md
@@ -260,7 +260,7 @@ The JSON record contains the following fields:
         "Eb/N0": <eb_n0>,
         "snr": <snr>,
         "ulp": <demod_ulp>,
-        "len_filler_bits":<len_filler_bits>
+        "len_filler_bits": <len_filler_bits>,
         "bler": <bler>,
         "ber": <ber>
       }
diff --git a/simulation/awgn/awgn.cpp b/simulation/awgn/awgn.cpp
index 8e41e484e6159f811b5be20cbb79fd7eea26319c..6ae035c19b73e4ad16a91ec481d6a787b9ee55db 100644
--- a/simulation/awgn/awgn.cpp
+++ b/simulation/awgn/awgn.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "awgn.h"
 #include "rng.hpp"
diff --git a/simulation/awgn/awgn.h b/simulation/awgn/awgn.h
index a49a14bfdea891e231611295a526899df565d339..ad8c8fe397fda55260d459326c415bbeb521353f 100644
--- a/simulation/awgn/awgn.h
+++ b/simulation/awgn/awgn.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/simulation/capacity/capacity.py b/simulation/capacity/capacity.py
index b7801b7c32e838564b1df9c2315a614b158ebcd9..4b331ad5394788908ee210582e3adac946e5687c 100755
--- a/simulation/capacity/capacity.py
+++ b/simulation/capacity/capacity.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 from argparse import ArgumentParser
 from math import sqrt, exp, pi, log
diff --git a/simulation/convolutional_awgn/CMakeLists.txt b/simulation/convolutional_awgn/CMakeLists.txt
index c074e6381a553839f5c13d3949af975869b5f600..14faf79fac024d101910d71acf49da3bfd64c8d8 100644
--- a/simulation/convolutional_awgn/CMakeLists.txt
+++ b/simulation/convolutional_awgn/CMakeLists.txt
@@ -30,5 +30,6 @@ if(BUILD_TESTING AND NOT DEFINED ARMRAL_TEST_RUNNER)
   # a set of valid inputs. We do not check the validity of the output.
   # We also only run this if we are not using a test running wrapper.
   add_test(NAME convolutional_awgn COMMAND ${CMAKE_CURRENT_BINARY_DIR}/convolutional_awgn -k 8 -m 0 -u 128)
+  set_tests_properties(convolutional_awgn PROPERTIES TIMEOUT 3000)
   add_dependencies(check convolutional_awgn)
 endif()
diff --git a/simulation/convolutional_awgn/convolutional_awgn.cpp b/simulation/convolutional_awgn/convolutional_awgn.cpp
index ee13270b61866d0e3bf22d0c5dc99cb8ddc84342..294b5b30d0de0cdf07412bb82498d90e4c920694 100644
--- a/simulation/convolutional_awgn/convolutional_awgn.cpp
+++ b/simulation/convolutional_awgn/convolutional_awgn.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "awgn.h"
diff --git a/simulation/convolutional_awgn/convolutional_error_rate.py b/simulation/convolutional_awgn/convolutional_error_rate.py
index e3f9ba5d46e4ceb4b7614e8200bfa644039e8e81..1bd7e713311b1b1984b004ab2c48530acb1a2276 100755
--- a/simulation/convolutional_awgn/convolutional_error_rate.py
+++ b/simulation/convolutional_awgn/convolutional_error_rate.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 from argparse import ArgumentParser
 from dataclasses import dataclass
diff --git a/simulation/include/simulation_common.hpp b/simulation/include/simulation_common.hpp
index 927dddae4c7ab81dc85db35c6ff54913559a417a..ecced32b5c346f671d64f9903f3f44fabb76ffbd 100644
--- a/simulation/include/simulation_common.hpp
+++ b/simulation/include/simulation_common.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/simulation/include/simulation_common.py b/simulation/include/simulation_common.py
index 036990bdab8fd8d46f5a4222e10d32b2868c6a3d..f062f40ea97b363385cd3bbc917c61796de4b509 100755
--- a/simulation/include/simulation_common.py
+++ b/simulation/include/simulation_common.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 from dataclasses import dataclass
 from datetime import datetime
diff --git a/simulation/ldpc_awgn/CMakeLists.txt b/simulation/ldpc_awgn/CMakeLists.txt
index 637ab3c2ddea9efae0934ef6e096f7b333472821..bfba1437d22f00a32ec3b162b4c6a911d2f4fd34 100644
--- a/simulation/ldpc_awgn/CMakeLists.txt
+++ b/simulation/ldpc_awgn/CMakeLists.txt
@@ -30,5 +30,6 @@ if(BUILD_TESTING AND NOT DEFINED ARMRAL_TEST_RUNNER)
   # a set of valid inputs. We do not check the validity of the output.
   # We also only run this if we are not using a test running wrapper.
   add_test(NAME ldpc_awgn COMMAND ${CMAKE_CURRENT_BINARY_DIR}/ldpc_awgn -z 3 -b 1 -m 0 -r 0 -u 128)
+  set_tests_properties(ldpc_awgn PROPERTIES TIMEOUT 3000)
   add_dependencies(check ldpc_awgn)
 endif()
diff --git a/simulation/ldpc_awgn/ldpc_awgn.cpp b/simulation/ldpc_awgn/ldpc_awgn.cpp
index 59324880a213717caf61f0ee05dc9c03136b7750..9df77ae94b85c3fcae456d9e42afb8756da7eb9c 100644
--- a/simulation/ldpc_awgn/ldpc_awgn.cpp
+++ b/simulation/ldpc_awgn/ldpc_awgn.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "awgn.h"
@@ -102,9 +102,9 @@ void usage(const char *exe_name) {
       << "                       the symbol amplitudes are multiplied by a\n"
       << "                       scaling factor of 0x1p15/<demod_ulp>.\n"
       << "                       Default value is 128.\n"
-      << "  <len_filler_bits>    Filler bits length used to simulate case\n"
-      << "                       where transport block length is not multiple\n"
-      << "                       of Lifting size .\n"
+      << "  <len_filler_bits>    Number of filler bits to use when simulating\n"
+      << "                       cases where the transport block length is\n"
+      << "                       not a multiple of the lifting size.\n"
       << "                       Default length is 0.\n"
       << std::endl;
 }
@@ -279,7 +279,7 @@ struct sim_result {
     s << "{\"n\": " << n << ", \"bg\": " << bg << ", \"mod_type\": \""
       << mod_type << "\", \"rv\": " << rv << ", \"Eb/N0\": " << ebn0
       << ", \"snr\": " << snr << ", \"ulp\": " << ulp
-      << ",  \"len_filler_bits\": " << len_filler_bits << ",\"bler\": " << bler
+      << ", \"len_filler_bits\": " << len_filler_bits << ",\"bler\": " << bler
       << ", \"ber\": " << ber << "}";
     return std::move(s).str();
   }
diff --git a/simulation/ldpc_awgn/ldpc_error_rate.py b/simulation/ldpc_awgn/ldpc_error_rate.py
index ac7a47679b2caeb13139e1280c4c0e1f75664f5a..32e75e3c50a46595edc01cdd3ccc934193f1c9ab 100755
--- a/simulation/ldpc_awgn/ldpc_error_rate.py
+++ b/simulation/ldpc_awgn/ldpc_error_rate.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 from argparse import ArgumentParser
 from dataclasses import dataclass
diff --git a/simulation/modulation_awgn/CMakeLists.txt b/simulation/modulation_awgn/CMakeLists.txt
index eecf4024399d4da225cc5eea297a85b07ce59272..c30886adef14827a1b97aac820c7195ed0b345b1 100644
--- a/simulation/modulation_awgn/CMakeLists.txt
+++ b/simulation/modulation_awgn/CMakeLists.txt
@@ -30,5 +30,6 @@ if(BUILD_TESTING AND NOT DEFINED ARMRAL_TEST_RUNNER)
   # a set of valid inputs. We do not check the validity of the output.
   # We also only run this if we are not using a test running wrapper.
   add_test(NAME modulation_awgn COMMAND ${CMAKE_CURRENT_BINARY_DIR}/modulation_awgn -k 32 -m 0 -u 128)
+  set_tests_properties(modulation_awgn PROPERTIES TIMEOUT 3000)
   add_dependencies(check modulation_awgn)
 endif()
diff --git a/simulation/modulation_awgn/modulation_awgn.cpp b/simulation/modulation_awgn/modulation_awgn.cpp
index 32ea7b90c48af5775a6b9fe81a63f751f1e38dc3..426c90e10994ad86c1fedb61e3ce3ae08731df17 100644
--- a/simulation/modulation_awgn/modulation_awgn.cpp
+++ b/simulation/modulation_awgn/modulation_awgn.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "awgn.h"
 #include "bit_utils.hpp"
diff --git a/simulation/modulation_awgn/modulation_error_rate.py b/simulation/modulation_awgn/modulation_error_rate.py
index 53628cb42e97fc98505aec9e7e32d1c5c75c2f3e..c5b72ea749de781fe3123dbeaed539e8cf87547b 100755
--- a/simulation/modulation_awgn/modulation_error_rate.py
+++ b/simulation/modulation_awgn/modulation_error_rate.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 from argparse import ArgumentParser
 import pandas as pd
diff --git a/simulation/polar_awgn/CMakeLists.txt b/simulation/polar_awgn/CMakeLists.txt
index 52ef1cc4178c9ec7d90c6b1aff038b1471f632c6..249b4bc7200dd411a909612b8c28ebb47615bafc 100644
--- a/simulation/polar_awgn/CMakeLists.txt
+++ b/simulation/polar_awgn/CMakeLists.txt
@@ -30,5 +30,6 @@ if(BUILD_TESTING AND NOT DEFINED ARMRAL_TEST_RUNNER)
   # a set of valid inputs. We do not check the validity of the output.
   # We also only run this if we are not using a test running wrapper.
   add_test(NAME polar_awgn COMMAND ${CMAKE_CURRENT_BINARY_DIR}/polar_awgn -k 32 -e 32 -l 1 -m 0 -i 0 -u 128)
+  set_tests_properties(polar_awgn PROPERTIES TIMEOUT 3000)
   add_dependencies(check polar_awgn)
 endif()
diff --git a/simulation/polar_awgn/polar_awgn.cpp b/simulation/polar_awgn/polar_awgn.cpp
index 704963d78c9a8692bd81344b6d0f3f56bf8ed593..76241f0d2c5b677129c5ecdb524be4f37a4a4596 100644
--- a/simulation/polar_awgn/polar_awgn.cpp
+++ b/simulation/polar_awgn/polar_awgn.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "awgn.h"
@@ -105,7 +105,6 @@ struct polar_example_data {
   uint32_t len_out;
   uint32_t bits_per_mod_symbol;
   uint32_t num_mod_symbols;
-  uint32_t num_mod_symbols_matched;
   uint8_t *data_in;
   uint8_t *frozen_mask;
   uint8_t *data_interleave;
diff --git a/simulation/polar_awgn/polar_error_rate.py b/simulation/polar_awgn/polar_error_rate.py
index fd0016183341199b173e0e7dd4156710f3d3e9a2..5cd42341afe8cde5e049fc6032c2e98b4f76325a 100755
--- a/simulation/polar_awgn/polar_error_rate.py
+++ b/simulation/polar_awgn/polar_error_rate.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 from argparse import ArgumentParser
 from dataclasses import dataclass
diff --git a/simulation/turbo_awgn/CMakeLists.txt b/simulation/turbo_awgn/CMakeLists.txt
index afdc6a404c407f7cd7cd99152663541731d020b4..0f6389a025f8132fa6b9be906671a3a4d274fe7b 100644
--- a/simulation/turbo_awgn/CMakeLists.txt
+++ b/simulation/turbo_awgn/CMakeLists.txt
@@ -30,5 +30,6 @@ if(BUILD_TESTING AND NOT DEFINED ARMRAL_TEST_RUNNER)
   # a set of valid inputs. We do not check the validity of the output.
   # We also only run this if we are not using a test running wrapper.
   add_test(NAME turbo_awgn COMMAND ${CMAKE_CURRENT_BINARY_DIR}/turbo_awgn -k 40 -m 0 -i 1 -e 60)
+  set_tests_properties(turbo_awgn PROPERTIES TIMEOUT 3000)
   add_dependencies(check turbo_awgn)
 endif()
diff --git a/simulation/turbo_awgn/turbo_awgn.cpp b/simulation/turbo_awgn/turbo_awgn.cpp
index d751f22634292cdd6b56de983c37d4ad989fc3cc..29c8bdb40c92f75ff245a0526507e51792342302 100644
--- a/simulation/turbo_awgn/turbo_awgn.cpp
+++ b/simulation/turbo_awgn/turbo_awgn.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "awgn.h"
diff --git a/simulation/turbo_awgn/turbo_error_rate.py b/simulation/turbo_awgn/turbo_error_rate.py
index 33511294cdae5c10abeca148943b6a0a506a7eee..6725cdf09f8f6d7d8c46aecc59db1d7a70ec3329 100755
--- a/simulation/turbo_awgn/turbo_error_rate.py
+++ b/simulation/turbo_awgn/turbo_error_rate.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
-# Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 
 from argparse import ArgumentParser
 from dataclasses import dataclass
diff --git a/src/BasicMathFun/MatrixInv/arm_cmplx_hermitian_mat_inversion_f32.cpp b/src/BasicMathFun/MatrixInv/arm_cmplx_hermitian_mat_inversion_f32.cpp
index 14992ba9e002731299948ded92ed755af4e831a3..5c889e12542a41f1965dfb83e42d9e57c5256a21 100644
--- a/src/BasicMathFun/MatrixInv/arm_cmplx_hermitian_mat_inversion_f32.cpp
+++ b/src/BasicMathFun/MatrixInv/arm_cmplx_hermitian_mat_inversion_f32.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/BasicMathFun/MatrixInv/arm_cmplx_mat_inversion_f32.cpp b/src/BasicMathFun/MatrixInv/arm_cmplx_mat_inversion_f32.cpp
index 6246a8a59065b4970b8f71251b2fb770bd96234d..fccc80a6fa37031f62d10789d7fea3b772f7994d 100644
--- a/src/BasicMathFun/MatrixInv/arm_cmplx_mat_inversion_f32.cpp
+++ b/src/BasicMathFun/MatrixInv/arm_cmplx_mat_inversion_f32.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/BasicMathFun/MatrixInv/cmplx_hermitian_mat_inversion_f32.hpp b/src/BasicMathFun/MatrixInv/cmplx_hermitian_mat_inversion_f32.hpp
index c3d0d683cf465d0411b829cd8562d30b9b72865f..cb47a3ed62ab934d97f21810085af4f8108476cf 100644
--- a/src/BasicMathFun/MatrixInv/cmplx_hermitian_mat_inversion_f32.hpp
+++ b/src/BasicMathFun/MatrixInv/cmplx_hermitian_mat_inversion_f32.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 namespace armral::cmplx_herm_mat_inv {
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_aah_f32.cpp b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_aah_f32.cpp
index 45a05c70ffbe67618d2b53823f3adb2f99d1b3ca..29d168396c7040d61fb3e6b7079516c1402e0465 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_aah_f32.cpp
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_aah_f32.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_ahb_f32.c b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_ahb_f32.c
index 81907e8d180d8cdaa84b559a276339c154973e53..ddad389b10aa580d9ff12368b2de0ea315d7960c 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_ahb_f32.c
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_ahb_f32.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_f32.c b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_f32.c
index 1179efd7e56893991bd6aae139bb6ec558ce7833..4712548218b0c65941223535073c1d25f3baacaa 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_f32.c
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_f32.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16.c b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16.c
index 9ebad6c0086782530dcdd94639ee4ef8fd8d2e2d..f1b33411668af96e551912f4142897475610ba41 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16.c
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16_32bit.c b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16_32bit.c
index 7a7863e3482ccf9155e126144c69e80520c7b5af..fdfe709dac1f4dc6a7f596e31e3ad1ed0c90dabb 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16_32bit.c
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_mult_i16_32bit.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_f32.c b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_f32.c
index e0eeffed8066775073b5d5e4022fddfddc42e9e5..631c0b18c5315209bda20a5ebee8c33d6eff2163 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_f32.c
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_f32.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16.c b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16.c
index 13317c0f3ba25b5847d8fd12e9bcd0690197040e..1f2ba56b952477bcb8c7bd4b8b7fe7227a5741c7 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16.c
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16_32bit.c b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16_32bit.c
index 998abdfa52126ccd55a19e1afc44ef5bcb8d0369..5f0c082855ba5d202e599dd611140c6e82f439c5 100644
--- a/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16_32bit.c
+++ b/src/BasicMathFun/MatrixMult/arm_cmplx_mat_vec_mult_i16_32bit.c
@@ -1,17 +1,12 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
 
 #include <assert.h>
 
-typedef struct {
-  int32_t re;
-  int32_t im;
-} cmplx_int32_t;
-
 static int16x4_t vld1s_s16(const armral_cmplx_int16_t *p) {
   // there is no intrinsic for only loading 32-bits into an ACLE vector.
   int16x4_t ret;
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_1sc.c b/src/BasicMathFun/MatrixMult/arm_solve_1sc.c
index 18519b7a38d9ab636e89c290dda3c94b221087c6..6717b10ecfd9ddcce0a1d72b226958a0cddbd71a 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_1sc.c
+++ b/src/BasicMathFun/MatrixMult/arm_solve_1sc.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "arm_solve_1sc.h"
 #include "arm_solve_convert.h"
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_1sc.h b/src/BasicMathFun/MatrixMult/arm_solve_1sc.h
index 18c807e7335cd50c177ea3e5b843a1bc62541362..b0f9c8ab97ef9ce41aef4d8becc8f0b22a28fd2b 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_1sc.h
+++ b/src/BasicMathFun/MatrixMult/arm_solve_1sc.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_4sc.c b/src/BasicMathFun/MatrixMult/arm_solve_4sc.c
index f5f7761ed46547509312ce123bda45c05dfd1aad..56bd0d8dc47269acf670311d076022e5ffb1bdad 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_4sc.c
+++ b/src/BasicMathFun/MatrixMult/arm_solve_4sc.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "arm_solve_4sc.h"
 #include "arm_solve_convert.h"
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_4sc.h b/src/BasicMathFun/MatrixMult/arm_solve_4sc.h
index 7bb8a6f0e80128bcb1584f9ba0650937ebf4861b..f2963b814e354985faa3bd46fc1c64332250de9c 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_4sc.h
+++ b/src/BasicMathFun/MatrixMult/arm_solve_4sc.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_6sc.c b/src/BasicMathFun/MatrixMult/arm_solve_6sc.c
index b7ff8cc51245145c7b409848c9fc93ce1a5b2a54..edadced8061defad601ec118ed44cad8802bb982 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_6sc.c
+++ b/src/BasicMathFun/MatrixMult/arm_solve_6sc.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "arm_solve_6sc.h"
 #include "arm_solve_convert.h"
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_6sc.h b/src/BasicMathFun/MatrixMult/arm_solve_6sc.h
index cd4d641ea03af959e77e8a3ddceedd7536354a71..52bcf9da786e3ab4878e8b200158468b6c043418 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_6sc.h
+++ b/src/BasicMathFun/MatrixMult/arm_solve_6sc.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_convert.h b/src/BasicMathFun/MatrixMult/arm_solve_convert.h
index 3352d751a4e4c490155269080abd782d46b59911..acc88428d4c507f25fe86f00ba73aa6f7a29f348 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_convert.h
+++ b/src/BasicMathFun/MatrixMult/arm_solve_convert.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/BasicMathFun/MatrixMult/arm_solve_f32.c b/src/BasicMathFun/MatrixMult/arm_solve_f32.c
index 3afd242fd9dc547a079229e5cf5b9279015718a5..75dfecd78e6854c094acd007e9e18bfbd59f982d 100644
--- a/src/BasicMathFun/MatrixMult/arm_solve_f32.c
+++ b/src/BasicMathFun/MatrixMult/arm_solve_f32.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "arm_solve_1sc.h"
 #include "arm_solve_4sc.h"
diff --git a/src/BasicMathFun/MatrixPseudoInv/arm_cmplx_pseudo_inverse_direct_f32.cpp b/src/BasicMathFun/MatrixPseudoInv/arm_cmplx_pseudo_inverse_direct_f32.cpp
index 657c4637571a45ae69033a437129439e9bc40781..999db43406e52bd538fa3f73885964e2c420d746 100644
--- a/src/BasicMathFun/MatrixPseudoInv/arm_cmplx_pseudo_inverse_direct_f32.cpp
+++ b/src/BasicMathFun/MatrixPseudoInv/arm_cmplx_pseudo_inverse_direct_f32.cpp
@@ -1,29 +1,53 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "utils/allocators.hpp"
 
 #include "../MatrixInv/cmplx_hermitian_mat_inversion_f32.hpp"
+#include "cmplx_mat_pseudo_inverse.hpp"
 
 #include <cstdlib>
 
 namespace {
 
+template<uint16_t n, typename Allocator>
+void left_pseudo_inverse(uint16_t m, const float32_t lambda,
+                         const armral_cmplx_f32_t *__restrict p_src,
+                         armral_cmplx_f32_t *p_dst, Allocator &allocator) {
+
+  // Compute C = A^H * A
+  // We can use p_dst as an intermediate N-by-N array since it has size N-by-M,
+  // and N < M
+  auto *mat_aha = p_dst;
+  armral_cmplx_mat_mult_ahb_f32(m, n, n, p_src, p_src, mat_aha);
+
+  // Compute C += lambda * I
+  armral::cmplx_mat_pseudo_inv::add_lambda<n>(lambda, p_dst);
+
+  // Compute B = C^(-1)
+  auto mat_inv = allocate_uninitialized<armral_cmplx_f32_t>(allocator, n * n);
+  armral::cmplx_herm_mat_inv::invert_hermitian_matrix<n>(mat_aha,
+                                                         mat_inv.get());
+
+  // Compute B * A^H
+  armral::cmplx_mat_pseudo_inv::mat_mult_bah_f32(m, n, p_src, mat_inv.get(),
+                                                 p_dst);
+}
+
 template<uint16_t m, typename Allocator>
-void pseudo_inverse(uint16_t n, const float32_t lambda,
-                    const armral_cmplx_f32_t *__restrict p_src,
-                    armral_cmplx_f32_t *p_dst, Allocator &allocator) {
+void right_pseudo_inverse(uint16_t n, const float32_t lambda,
+                          const armral_cmplx_f32_t *__restrict p_src,
+                          armral_cmplx_f32_t *p_dst, Allocator &allocator) {
   // Compute C = A * A^H
   // We can use p_dst as an intermediate M-by-M array since it has size N-by-M,
   // and N >= M
   auto *mat_aah = p_dst;
   armral_cmplx_mat_mult_aah_f32(m, n, p_src, mat_aah);
 
-  for (uint16_t i = 0; i < m; i++) {
-    mat_aah[i * (m + 1)].re += lambda;
-  }
+  // Compute C += lambda * I
+  armral::cmplx_mat_pseudo_inv::add_lambda<m>(lambda, mat_aah);
 
   // Compute B = C^(-1)
   auto mat_inv = allocate_uninitialized<armral_cmplx_f32_t>(allocator, m * m);
@@ -40,35 +64,63 @@ cmplx_pseudo_inverse_direct(uint16_t m, uint16_t n, const float32_t lambda,
                             const armral_cmplx_f32_t *__restrict p_src,
                             armral_cmplx_f32_t *p_dst, Allocator &allocator) {
 
-  // The number of rows must be less than or equal to the number of columns to
-  // allow A * A^H to be invertible
-  if (m > n) {
-    return ARMRAL_ARGUMENT_ERROR;
-  }
-
   // This routine uses the Hermitian matrix inversion routine defined in the
   // library (armral_cmplx_hermitian_max_inverse_f32) which is only valid for
   // particular matrix sizes. This places a restriction on the number of rows
   // that the input matrix A can have here.
+
+  // If the number of rows in the input matrix is larger than the number of
+  // columns then use the left pseudo-inverse
+  if (m > n) {
+    switch (n) {
+    case 2: {
+      left_pseudo_inverse<2>(m, lambda, p_src, p_dst, allocator);
+      break;
+    }
+    case 3: {
+      left_pseudo_inverse<3>(m, lambda, p_src, p_dst, allocator);
+      break;
+    }
+    case 4: {
+      left_pseudo_inverse<4>(m, lambda, p_src, p_dst, allocator);
+      break;
+    }
+    case 8: {
+      left_pseudo_inverse<8>(m, lambda, p_src, p_dst, allocator);
+      break;
+    }
+    case 16: {
+      left_pseudo_inverse<16>(m, lambda, p_src, p_dst, allocator);
+      break;
+    }
+    default:
+      return ARMRAL_ARGUMENT_ERROR;
+    }
+
+    return ARMRAL_SUCCESS;
+  }
+
+  // If the number of rows in the input matrix is less than or equal to the number
+  // of columns then use the right pseudo-inverse
   switch (m) {
   case 2: {
-    pseudo_inverse<2>(n, lambda, p_src, p_dst, allocator);
+    right_pseudo_inverse<2>(n, lambda, p_src, p_dst, allocator);
     break;
   }
   case 3: {
-    pseudo_inverse<3>(n, lambda, p_src, p_dst, allocator);
+    right_pseudo_inverse<3>(n, lambda, p_src, p_dst, allocator);
     break;
   }
   case 4: {
-    pseudo_inverse<4>(n, lambda, p_src, p_dst, allocator);
+    right_pseudo_inverse<4>(n, lambda, p_src, p_dst, allocator);
     break;
   }
   case 8: {
-    pseudo_inverse<8>(n, lambda, p_src, p_dst, allocator);
+    right_pseudo_inverse<8>(n, lambda, p_src, p_dst, allocator);
     break;
   }
   case 16: {
-    pseudo_inverse<16>(n, lambda, p_src, p_dst, allocator);
+    right_pseudo_inverse<16>(n, lambda, p_src, p_dst, allocator);
     break;
   }
   default:
diff --git a/src/BasicMathFun/MatrixPseudoInv/cmplx_mat_pseudo_inverse.hpp b/src/BasicMathFun/MatrixPseudoInv/cmplx_mat_pseudo_inverse.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..04525b9a22c781c19707c0eb4bfbac680d6b3136
--- /dev/null
+++ b/src/BasicMathFun/MatrixPseudoInv/cmplx_mat_pseudo_inverse.hpp
@@ -0,0 +1,69 @@
+/*
+    Arm RAN Acceleration Library
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+*/
+
+namespace armral::cmplx_mat_pseudo_inv {
+
+void mat_mult_bah_f32(uint16_t m, uint16_t n,
+                      const armral_cmplx_f32_t *__restrict p_src_a,
+                      const armral_cmplx_f32_t *__restrict p_src_b,
+                      armral_cmplx_f32_t *p_dst) {
+  // For input matrices A and B, computes B * A^H
+  for (uint16_t i = 0; i < n; i++) {
+    for (uint16_t j = 0; j < m; j++) {
+      float32_t re = 0.0;
+      float32_t im = 0.0;
+      float32x4x2_t p_out = {{vdupq_n_f32(0.F), vdupq_n_f32(0.F)}};
+
+      uint16_t k = 0;
+      for (; k + 3 < n; k += 4) {
+        uint32_t b_idx = i * n + k;
+        uint32_t ah_idx = j * n + k;
+
+        float32x4_t p_in_b[] = {
+            vld1q_f32((const float32_t *)&p_src_b[b_idx]),
+            vld1q_f32((const float32_t *)&p_src_b[b_idx + 2])};
+        float32x4_t p_in_ah[] = {
+            vld1q_f32((const float32_t *)&p_src_a[ah_idx]),
+            vld1q_f32((const float32_t *)&p_src_a[ah_idx + 2])};
+
+        // c.re = a.re * ah.re + a.im * ah.im
+        p_out.val[0] = vfmaq_f32(p_out.val[0], p_in_b[0], p_in_ah[0]);
+        p_out.val[0] = vfmaq_f32(p_out.val[0], p_in_b[1], p_in_ah[1]);
+
+        // c.im = a.im * ah.re - a.re * ah.im
+        p_in_ah[0] = vrev64q_f32(p_in_ah[0]);
+        p_in_ah[1] = vrev64q_f32(p_in_ah[1]);
+        p_out.val[1] = vfmsq_f32(p_out.val[1], p_in_b[0], p_in_ah[0]);
+        p_out.val[1] = vfmsq_f32(p_out.val[1], p_in_b[1], p_in_ah[1]);
+      }
+      re = vaddvq_f32(p_out.val[0]);
+      p_out.val[1] =
+          vreinterpretq_f32_f64(vnegq_f64(vreinterpretq_f64_f32(p_out.val[1])));
+      im = vaddvq_f32(p_out.val[1]);
+
+      if (n % 4 != 0) {
+        for (; k < n; k++) {
+          uint32_t b_idx = i * n + k;
+          uint32_t ah_idx = j * n + k;
+          re += p_src_b[b_idx].re * p_src_a[ah_idx].re +
+                p_src_b[b_idx].im * p_src_a[ah_idx].im;
+          im += p_src_b[b_idx].im * p_src_a[ah_idx].re -
+                p_src_b[b_idx].re * p_src_a[ah_idx].im;
+        }
+      }
+      p_dst[i * m + j] = armral_cmplx_f32_t{re, im};
+    }
+  }
+}
+
+template<uint16_t dim>
+void add_lambda(float32_t lambda, armral_cmplx_f32_t *p_dst) {
+  // Adds lambda to the diagonals of a dim-by-dim matrix
+  for (uint16_t i = 0; i < dim; i++) {
+    p_dst[i * (dim + 1)].re += lambda;
+  }
+}
+
+} // namespace armral::cmplx_mat_pseudo_inv
diff --git a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32.c b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32.c
index ba61e9892ccf578fddab6c5744907f283fce9176..98134622d1b3c9223fd60f35d7a147603fd804d6 100644
--- a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32.c
+++ b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "armral.h"
diff --git a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32_2.c b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32_2.c
index 129fed3bb54b6b0492a1efb2d1198636a9a8e55d..4549d4a31537822ff5fb32052f589316f483ad2a 100644
--- a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32_2.c
+++ b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_f32_2.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #ifdef ARMRAL_ARCH_SVE
@@ -50,12 +50,12 @@ armral_status armral_cmplx_vecdot_f32_2(int32_t n,
     svfloat32_t vec_b_imag = svld1_f32(tail_pg, p_src_b_im);
 
     /* Re{C} = Re{A}*Re{B} - Im{A}*Im{B} */
-    acc_real = svmla_f32_x(tail_pg, acc_real, vec_a_real, vec_b_real);
-    acc_real = svmls_f32_x(tail_pg, acc_real, vec_a_imag, vec_b_imag);
+    acc_real = svmla_f32_m(tail_pg, acc_real, vec_a_real, vec_b_real);
+    acc_real = svmls_f32_m(tail_pg, acc_real, vec_a_imag, vec_b_imag);
 
     /* Im{C} = Re{A}*Im{B} + Im{A}*Re{B} */
-    acc_imag = svmla_f32_x(tail_pg, acc_imag, vec_a_real, vec_b_imag);
-    acc_imag = svmla_f32_x(tail_pg, acc_imag, vec_a_imag, vec_b_real);
+    acc_imag = svmla_f32_m(tail_pg, acc_imag, vec_a_real, vec_b_imag);
+    acc_imag = svmla_f32_m(tail_pg, acc_imag, vec_a_imag, vec_b_real);
   }
 
   *p_src_c_re = svaddv_f32(pg, acc_real);
diff --git a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16.c b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16.c
index 29629a0ca840041ac884eb8204f97ccd2c0c80ca..82ff5c4137d5285b00e371ac4845857f843bd22b 100644
--- a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16.c
+++ b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2.c b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2.c
index cb03f06b1767e1cfc8ce61da3b30c2e5d502cf30..36d404383097d09fa75f88c0146560ed69c5102e 100644
--- a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2.c
+++ b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2_32bit.c b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2_32bit.c
index 7957e468a89c893d253358ae53583528c01c30e5..1bceca56b3748472961fbf26cd8ee2217ec0dbef 100644
--- a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2_32bit.c
+++ b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_2_32bit.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_32bit.c b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_32bit.c
index 1d3d9a2e241b5f7a961dc1bc654dd31fd900f7a7..9a40c00668fd86cde7c403cef05c170199b7fc82 100644
--- a/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_32bit.c
+++ b/src/BasicMathFun/VectorDotProd/arm_cmplx_vecdot_i16_32bit.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32.c b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32.c
index 613ffd4ea317ec4ed6a1a533dac23d33a4c8845f..a6660f20fe3e780bc6877ea7ca26413c6a87ca84 100644
--- a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32.c
+++ b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #ifdef ARMRAL_ARCH_SVE
diff --git a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32_2.c b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32_2.c
index da4a5768e1559faa5c1c30833c3200a5a4b359d0..ac068af2d05b2bb96a0399d59c761ad874dba4e3 100644
--- a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32_2.c
+++ b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_f32_2.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #ifdef ARMRAL_ARCH_SVE
diff --git a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16.cpp b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16.cpp
index 9cce4ad0e6fec816761267a690fcb0ba3927e1a7..617ddbafd7560c78cff01194a1ec41e8ee3c7bf2 100644
--- a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16.cpp
+++ b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16_2.c b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16_2.c
index 05fef2fb8c145192f521d9d2bd00a19559c7a8ef..322e0cdf46238ada687eaf9a6a1cf9d8c8326b14 100644
--- a/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16_2.c
+++ b/src/BasicMathFun/VectorMult/arm_cmplx_vecmul_i16_2.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/DuRuInterface/MuLawCompression/arm_mu_law_compression.cpp b/src/DuRuInterface/MuLawCompression/arm_mu_law_compression.cpp
index 53d28e5578bc77af46b867ff04151565bb2238cc..34e0c2df3efa3fe9127c664b9f72356afb71209a 100644
--- a/src/DuRuInterface/MuLawCompression/arm_mu_law_compression.cpp
+++ b/src/DuRuInterface/MuLawCompression/arm_mu_law_compression.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #ifdef ARMRAL_ARCH_SVE
diff --git a/src/DuRuInterface/MuLawCompression/arm_mu_law_decompression.cpp b/src/DuRuInterface/MuLawCompression/arm_mu_law_decompression.cpp
index 3bfaa21effa56eda7440e81c6926d7e3b03e46f5..582de2fd7870b782a5ec21d6e88c38e341231812 100644
--- a/src/DuRuInterface/MuLawCompression/arm_mu_law_decompression.cpp
+++ b/src/DuRuInterface/MuLawCompression/arm_mu_law_decompression.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "utils/vec_mul.hpp"
diff --git a/src/DuRuInterface/ORanBlockFloat/arm_block_float_compression.cpp b/src/DuRuInterface/ORanBlockFloat/arm_block_float_compression.cpp
index ea7c8e518bc09a24865e7fa5ca42e36f4545a2f4..310c0170555043051bc885151229324b04e9b146 100644
--- a/src/DuRuInterface/ORanBlockFloat/arm_block_float_compression.cpp
+++ b/src/DuRuInterface/ORanBlockFloat/arm_block_float_compression.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #if ARMRAL_ARCH_SVE >= 2
diff --git a/src/DuRuInterface/ORanBlockFloat/arm_block_float_decompression.cpp b/src/DuRuInterface/ORanBlockFloat/arm_block_float_decompression.cpp
index 31242b28b9e7e97ce14ba8e86beaa735326be7b4..7d41c34f37cd6e5b9c4de446b3fcd8ba87a938ae 100644
--- a/src/DuRuInterface/ORanBlockFloat/arm_block_float_decompression.cpp
+++ b/src/DuRuInterface/ORanBlockFloat/arm_block_float_decompression.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #if ARMRAL_ARCH_SVE >= 2
diff --git a/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_compression.cpp b/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_compression.cpp
index c458999145b1df17c1f4da35c7286783bc10d4e6..7d96bf406fb935057215f6ac15dd37cf4e3bc57c 100644
--- a/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_compression.cpp
+++ b/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_compression.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #if ARMRAL_ARCH_SVE >= 2
diff --git a/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_decompression.cpp b/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_decompression.cpp
index 403d35639dba4cfc22f1b749529cf63ca30c5ac0..c8fb82a4edd6850db067abda6a948b80fbe9d329 100644
--- a/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_decompression.cpp
+++ b/src/DuRuInterface/ORanBlockScaling/arm_block_scaling_decompression.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/DuRuInterface/bit_packing_common.hpp b/src/DuRuInterface/bit_packing_common.hpp
index be0b4c8bb3d8edc12c2008a38322cfdff6131c57..2c7af3b4a26fcb6484da28e5de84bd903d9ebfe3 100644
--- a/src/DuRuInterface/bit_packing_common.hpp
+++ b/src/DuRuInterface/bit_packing_common.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/Correlation/arm_correlation.c b/src/LowerPHY/Correlation/arm_correlation.c
index 8fee9b72113c21e2ec2abf01b7e67fd1ec9997fa..71dce495d3c67c6f8e14e064cf37373c75289586 100644
--- a/src/LowerPHY/Correlation/arm_correlation.c
+++ b/src/LowerPHY/Correlation/arm_correlation.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/LowerPHY/FFT/fft_cf32.cpp b/src/LowerPHY/FFT/fft_cf32.cpp
index 58d2850ab9615204067df46c246ef48a512c2d9b..830bb0239b1e2d402047253689f4fab04847fc9f 100644
--- a/src/LowerPHY/FFT/fft_cf32.cpp
+++ b/src/LowerPHY/FFT/fft_cf32.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_execute.hpp"
 #include "fft_plan.hpp"
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.c b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.c
index 247de42b7ee64ef6f59bd9fef5c3de10d1c1e191..bf5176b8c123ebbee9351f2be7261d627222ba22 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.c
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cf32_cf32_cf32_ab_t_gs.h"
 
@@ -2622,12 +2622,12 @@ void armral_fft_cf32_cf32_cf32_ab_t_gs11(const armral_cmplx_f32_t *restrict x,
     float v416 = 5.5486073394528512e-01F;
     float v419 = 1.2412944743900585e+00F;
     float v420 = -1.2412944743900585e+00F;
-    float v426 = 2.0897833842005753e-01F;
-    float v427 = -2.0897833842005753e-01F;
-    float v433 = 3.7415717312460806e-01F;
-    float v434 = -3.7415717312460806e-01F;
-    float v440 = 4.9929922194110354e-02F;
-    float v441 = -4.9929922194110354e-02F;
+    float v426 = 2.0897833842005756e-01F;
+    float v427 = -2.0897833842005756e-01F;
+    float v433 = 3.7415717312460811e-01F;
+    float v434 = -3.7415717312460811e-01F;
+    float v440 = 4.9929922194110327e-02F;
+    float v441 = -4.9929922194110327e-02F;
     float v447 = 6.5815896284539266e-01F;
     float v448 = -6.5815896284539266e-01F;
     float v454 = 6.3306543373877577e-01F;
@@ -2923,9 +2923,9 @@ void armral_fft_cf32_cf32_cf32_ab_t_gs11(const armral_cmplx_f32_t *restrict x,
     float v325 = 1.0702757469471715e+00F;
     float v330 = 5.5486073394528512e-01F;
     float v335 = -1.2412944743900585e+00F;
-    float v342 = -2.0897833842005753e-01F;
-    float v349 = -3.7415717312460806e-01F;
-    float v356 = -4.9929922194110354e-02F;
+    float v342 = -2.0897833842005756e-01F;
+    float v349 = -3.7415717312460811e-01F;
+    float v356 = -4.9929922194110327e-02F;
     float v363 = -6.5815896284539266e-01F;
     float v370 = -6.3306543373877577e-01F;
     float v377 = -1.0822460581641109e+00F;
@@ -12769,12 +12769,12 @@ void armral_fft_cf32_cf32_cf32_ab_t_gs22(const armral_cmplx_f32_t *restrict x,
     float v977 = 5.5486073394528512e-01F;
     float v980 = 1.2412944743900585e+00F;
     float v981 = -1.2412944743900585e+00F;
-    float v987 = 2.0897833842005753e-01F;
-    float v988 = -2.0897833842005753e-01F;
-    float v994 = 3.7415717312460806e-01F;
-    float v995 = -3.7415717312460806e-01F;
-    float v1001 = 4.9929922194110354e-02F;
-    float v1002 = -4.9929922194110354e-02F;
+    float v987 = 2.0897833842005756e-01F;
+    float v988 = -2.0897833842005756e-01F;
+    float v994 = 3.7415717312460811e-01F;
+    float v995 = -3.7415717312460811e-01F;
+    float v1001 = 4.9929922194110327e-02F;
+    float v1002 = -4.9929922194110327e-02F;
     float v1008 = 6.5815896284539266e-01F;
     float v1009 = -6.5815896284539266e-01F;
     float v1015 = 6.3306543373877577e-01F;
@@ -13316,9 +13316,9 @@ void armral_fft_cf32_cf32_cf32_ab_t_gs22(const armral_cmplx_f32_t *restrict x,
     float v787 = 1.0702757469471715e+00F;
     float v792 = 5.5486073394528512e-01F;
     float v797 = -1.2412944743900585e+00F;
-    float v804 = -2.0897833842005753e-01F;
-    float v811 = -3.7415717312460806e-01F;
-    float v818 = -4.9929922194110354e-02F;
+    float v804 = -2.0897833842005756e-01F;
+    float v811 = -3.7415717312460811e-01F;
+    float v818 = -4.9929922194110327e-02F;
     float v825 = -6.5815896284539266e-01F;
     float v832 = -6.3306543373877577e-01F;
     float v839 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.h b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.h
index 64be0bf97c3295f80235954c0bb18ca8bb32ed36..98033b7e4988b00db4b80038909edacb9bf0d6b9 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.h
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gs.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.c b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.c
index 1468fa558edcae4d169a8313d176a04d13141d77..23c5797c849a16d2a992de454f55802a99662166 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.c
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cf32_cf32_cf32_ab_t_gu.h"
 
@@ -2531,12 +2531,12 @@ void armral_fft_cf32_cf32_cf32_ab_t_gu11(const armral_cmplx_f32_t *restrict x,
     float v416 = 5.5486073394528512e-01F;
     float v419 = 1.2412944743900585e+00F;
     float v420 = -1.2412944743900585e+00F;
-    float v426 = 2.0897833842005753e-01F;
-    float v427 = -2.0897833842005753e-01F;
-    float v433 = 3.7415717312460806e-01F;
-    float v434 = -3.7415717312460806e-01F;
-    float v440 = 4.9929922194110354e-02F;
-    float v441 = -4.9929922194110354e-02F;
+    float v426 = 2.0897833842005756e-01F;
+    float v427 = -2.0897833842005756e-01F;
+    float v433 = 3.7415717312460811e-01F;
+    float v434 = -3.7415717312460811e-01F;
+    float v440 = 4.9929922194110327e-02F;
+    float v441 = -4.9929922194110327e-02F;
     float v447 = 6.5815896284539266e-01F;
     float v448 = -6.5815896284539266e-01F;
     float v454 = 6.3306543373877577e-01F;
@@ -2830,9 +2830,9 @@ void armral_fft_cf32_cf32_cf32_ab_t_gu11(const armral_cmplx_f32_t *restrict x,
     float v325 = 1.0702757469471715e+00F;
     float v330 = 5.5486073394528512e-01F;
     float v335 = -1.2412944743900585e+00F;
-    float v342 = -2.0897833842005753e-01F;
-    float v349 = -3.7415717312460806e-01F;
-    float v356 = -4.9929922194110354e-02F;
+    float v342 = -2.0897833842005756e-01F;
+    float v349 = -3.7415717312460811e-01F;
+    float v356 = -4.9929922194110327e-02F;
     float v363 = -6.5815896284539266e-01F;
     float v370 = -6.3306543373877577e-01F;
     float v377 = -1.0822460581641109e+00F;
@@ -12458,12 +12458,12 @@ void armral_fft_cf32_cf32_cf32_ab_t_gu22(const armral_cmplx_f32_t *restrict x,
     float v977 = 5.5486073394528512e-01F;
     float v980 = 1.2412944743900585e+00F;
     float v981 = -1.2412944743900585e+00F;
-    float v987 = 2.0897833842005753e-01F;
-    float v988 = -2.0897833842005753e-01F;
-    float v994 = 3.7415717312460806e-01F;
-    float v995 = -3.7415717312460806e-01F;
-    float v1001 = 4.9929922194110354e-02F;
-    float v1002 = -4.9929922194110354e-02F;
+    float v987 = 2.0897833842005756e-01F;
+    float v988 = -2.0897833842005756e-01F;
+    float v994 = 3.7415717312460811e-01F;
+    float v995 = -3.7415717312460811e-01F;
+    float v1001 = 4.9929922194110327e-02F;
+    float v1002 = -4.9929922194110327e-02F;
     float v1008 = 6.5815896284539266e-01F;
     float v1009 = -6.5815896284539266e-01F;
     float v1015 = 6.3306543373877577e-01F;
@@ -13003,9 +13003,9 @@ void armral_fft_cf32_cf32_cf32_ab_t_gu22(const armral_cmplx_f32_t *restrict x,
     float v787 = 1.0702757469471715e+00F;
     float v792 = 5.5486073394528512e-01F;
     float v797 = -1.2412944743900585e+00F;
-    float v804 = -2.0897833842005753e-01F;
-    float v811 = -3.7415717312460806e-01F;
-    float v818 = -4.9929922194110354e-02F;
+    float v804 = -2.0897833842005756e-01F;
+    float v811 = -3.7415717312460811e-01F;
+    float v818 = -4.9929922194110327e-02F;
     float v825 = -6.5815896284539266e-01F;
     float v832 = -6.3306543373877577e-01F;
     float v839 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.h b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.h
index fe06a56fb0f639cf9722be9c5b2b06a2509af850..8edbe4603f4ed6c016be3f84c48777b466a8d290 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.h
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ab_t_gu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.c b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.c
index d2b63f7e6ed227855f11a72157c99cfd151240a2..a61ff10b7e39fcb31192749fdc4dc4723364decb 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.c
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cf32_cf32_cf32_ac_n_gu.h"
 
@@ -1747,12 +1747,12 @@ void armral_fft_cf32_cf32_cf32_ac_n_gu11(const armral_cmplx_f32_t *restrict x,
     float v156 = 5.5486073394528512e-01F;
     float v159 = 1.2412944743900585e+00F;
     float v160 = -1.2412944743900585e+00F;
-    float v166 = 2.0897833842005753e-01F;
-    float v167 = -2.0897833842005753e-01F;
-    float v173 = 3.7415717312460806e-01F;
-    float v174 = -3.7415717312460806e-01F;
-    float v180 = 4.9929922194110354e-02F;
-    float v181 = -4.9929922194110354e-02F;
+    float v166 = 2.0897833842005756e-01F;
+    float v167 = -2.0897833842005756e-01F;
+    float v173 = 3.7415717312460811e-01F;
+    float v174 = -3.7415717312460811e-01F;
+    float v180 = 4.9929922194110327e-02F;
+    float v181 = -4.9929922194110327e-02F;
     float v187 = 6.5815896284539266e-01F;
     float v188 = -6.5815896284539266e-01F;
     float v194 = 6.3306543373877577e-01F;
@@ -1964,9 +1964,9 @@ void armral_fft_cf32_cf32_cf32_ac_n_gu11(const armral_cmplx_f32_t *restrict x,
     float v185 = 1.0702757469471715e+00F;
     float v190 = 5.5486073394528512e-01F;
     float v195 = -1.2412944743900585e+00F;
-    float v202 = -2.0897833842005753e-01F;
-    float v209 = -3.7415717312460806e-01F;
-    float v216 = -4.9929922194110354e-02F;
+    float v202 = -2.0897833842005756e-01F;
+    float v209 = -3.7415717312460811e-01F;
+    float v216 = -4.9929922194110327e-02F;
     float v223 = -6.5815896284539266e-01F;
     float v230 = -6.3306543373877577e-01F;
     float v237 = -1.0822460581641109e+00F;
@@ -8899,12 +8899,12 @@ void armral_fft_cf32_cf32_cf32_ac_n_gu22(const armral_cmplx_f32_t *restrict x,
     float v431 = 5.5486073394528512e-01F;
     float v434 = 1.2412944743900585e+00F;
     float v435 = -1.2412944743900585e+00F;
-    float v441 = 2.0897833842005753e-01F;
-    float v442 = -2.0897833842005753e-01F;
-    float v448 = 3.7415717312460806e-01F;
-    float v449 = -3.7415717312460806e-01F;
-    float v455 = 4.9929922194110354e-02F;
-    float v456 = -4.9929922194110354e-02F;
+    float v441 = 2.0897833842005756e-01F;
+    float v442 = -2.0897833842005756e-01F;
+    float v448 = 3.7415717312460811e-01F;
+    float v449 = -3.7415717312460811e-01F;
+    float v455 = 4.9929922194110327e-02F;
+    float v456 = -4.9929922194110327e-02F;
     float v462 = 6.5815896284539266e-01F;
     float v463 = -6.5815896284539266e-01F;
     float v469 = 6.3306543373877577e-01F;
@@ -9274,9 +9274,9 @@ void armral_fft_cf32_cf32_cf32_ac_n_gu22(const armral_cmplx_f32_t *restrict x,
     float v493 = 1.0702757469471715e+00F;
     float v498 = 5.5486073394528512e-01F;
     float v503 = -1.2412944743900585e+00F;
-    float v510 = -2.0897833842005753e-01F;
-    float v517 = -3.7415717312460806e-01F;
-    float v524 = -4.9929922194110354e-02F;
+    float v510 = -2.0897833842005756e-01F;
+    float v517 = -3.7415717312460811e-01F;
+    float v524 = -4.9929922194110327e-02F;
     float v531 = -6.5815896284539266e-01F;
     float v538 = -6.3306543373877577e-01F;
     float v545 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.h b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.h
index 557619fc187b5cf889706bf392477f441a22bb47..57014ea8e384b7a651f83a447b6fd9e1ae4c24dd 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.h
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_gu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.c b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.c
index e4aed1b15887e4a773680bc73d6fb527d1251f89..cd7b9b1cc3965bfec80083f4ed4038c8cf35e7c0 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.c
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cf32_cf32_cf32_ac_n_uu.h"
 
@@ -2421,12 +2421,12 @@ void armral_fft_cf32_cf32_cf32_ac_n_uu11(const armral_cmplx_f32_t *restrict x,
     float v194 = 5.5486073394528512e-01F;
     float v198 = 1.2412944743900585e+00F;
     float v199 = -1.2412944743900585e+00F;
-    float v206 = 2.0897833842005753e-01F;
-    float v207 = -2.0897833842005753e-01F;
-    float v214 = 3.7415717312460806e-01F;
-    float v215 = -3.7415717312460806e-01F;
-    float v222 = 4.9929922194110354e-02F;
-    float v223 = -4.9929922194110354e-02F;
+    float v206 = 2.0897833842005756e-01F;
+    float v207 = -2.0897833842005756e-01F;
+    float v214 = 3.7415717312460811e-01F;
+    float v215 = -3.7415717312460811e-01F;
+    float v222 = 4.9929922194110327e-02F;
+    float v223 = -4.9929922194110327e-02F;
     float v230 = 6.5815896284539266e-01F;
     float v231 = -6.5815896284539266e-01F;
     float v238 = 6.3306543373877577e-01F;
@@ -2665,12 +2665,12 @@ void armral_fft_cf32_cf32_cf32_ac_n_uu11(const armral_cmplx_f32_t *restrict x,
     float v542 = 5.5486073394528512e-01F;
     float v545 = 1.2412944743900585e+00F;
     float v546 = -1.2412944743900585e+00F;
-    float v552 = 2.0897833842005753e-01F;
-    float v553 = -2.0897833842005753e-01F;
-    float v559 = 3.7415717312460806e-01F;
-    float v560 = -3.7415717312460806e-01F;
-    float v566 = 4.9929922194110354e-02F;
-    float v567 = -4.9929922194110354e-02F;
+    float v552 = 2.0897833842005756e-01F;
+    float v553 = -2.0897833842005756e-01F;
+    float v559 = 3.7415717312460811e-01F;
+    float v560 = -3.7415717312460811e-01F;
+    float v566 = 4.9929922194110327e-02F;
+    float v567 = -4.9929922194110327e-02F;
     float v573 = 6.5815896284539266e-01F;
     float v574 = -6.5815896284539266e-01F;
     float v580 = 6.3306543373877577e-01F;
@@ -2881,9 +2881,9 @@ void armral_fft_cf32_cf32_cf32_ac_n_uu11(const armral_cmplx_f32_t *restrict x,
     float v185 = 1.0702757469471715e+00F;
     float v190 = 5.5486073394528512e-01F;
     float v195 = -1.2412944743900585e+00F;
-    float v202 = -2.0897833842005753e-01F;
-    float v209 = -3.7415717312460806e-01F;
-    float v216 = -4.9929922194110354e-02F;
+    float v202 = -2.0897833842005756e-01F;
+    float v209 = -3.7415717312460811e-01F;
+    float v216 = -4.9929922194110327e-02F;
     float v223 = -6.5815896284539266e-01F;
     float v230 = -6.3306543373877577e-01F;
     float v237 = -1.0822460581641109e+00F;
@@ -12672,12 +12672,12 @@ void armral_fft_cf32_cf32_cf32_ac_n_uu22(const armral_cmplx_f32_t *restrict x,
     float v512 = 5.5486073394528512e-01F;
     float v516 = 1.2412944743900585e+00F;
     float v517 = -1.2412944743900585e+00F;
-    float v524 = 2.0897833842005753e-01F;
-    float v525 = -2.0897833842005753e-01F;
-    float v532 = 3.7415717312460806e-01F;
-    float v533 = -3.7415717312460806e-01F;
-    float v540 = 4.9929922194110354e-02F;
-    float v541 = -4.9929922194110354e-02F;
+    float v524 = 2.0897833842005756e-01F;
+    float v525 = -2.0897833842005756e-01F;
+    float v532 = 3.7415717312460811e-01F;
+    float v533 = -3.7415717312460811e-01F;
+    float v540 = 4.9929922194110327e-02F;
+    float v541 = -4.9929922194110327e-02F;
     float v548 = 6.5815896284539266e-01F;
     float v549 = -6.5815896284539266e-01F;
     float v556 = 6.3306543373877577e-01F;
@@ -13096,12 +13096,12 @@ void armral_fft_cf32_cf32_cf32_ac_n_uu22(const armral_cmplx_f32_t *restrict x,
     float v1212 = 5.5486073394528512e-01F;
     float v1215 = 1.2412944743900585e+00F;
     float v1216 = -1.2412944743900585e+00F;
-    float v1222 = 2.0897833842005753e-01F;
-    float v1223 = -2.0897833842005753e-01F;
-    float v1229 = 3.7415717312460806e-01F;
-    float v1230 = -3.7415717312460806e-01F;
-    float v1236 = 4.9929922194110354e-02F;
-    float v1237 = -4.9929922194110354e-02F;
+    float v1222 = 2.0897833842005756e-01F;
+    float v1223 = -2.0897833842005756e-01F;
+    float v1229 = 3.7415717312460811e-01F;
+    float v1230 = -3.7415717312460811e-01F;
+    float v1236 = 4.9929922194110327e-02F;
+    float v1237 = -4.9929922194110327e-02F;
     float v1243 = 6.5815896284539266e-01F;
     float v1244 = -6.5815896284539266e-01F;
     float v1250 = 6.3306543373877577e-01F;
@@ -13470,9 +13470,9 @@ void armral_fft_cf32_cf32_cf32_ac_n_uu22(const armral_cmplx_f32_t *restrict x,
     float v493 = 1.0702757469471715e+00F;
     float v498 = 5.5486073394528512e-01F;
     float v503 = -1.2412944743900585e+00F;
-    float v510 = -2.0897833842005753e-01F;
-    float v517 = -3.7415717312460806e-01F;
-    float v524 = -4.9929922194110354e-02F;
+    float v510 = -2.0897833842005756e-01F;
+    float v517 = -3.7415717312460811e-01F;
+    float v524 = -4.9929922194110327e-02F;
     float v531 = -6.5815896284539266e-01F;
     float v538 = -6.3306543373877577e-01F;
     float v545 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.h b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.h
index 00c743b3bf6d151ab6f9e63e7cf973125f15b475..9b78818cb0a752c60b3a344c140d72b267d11e86 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.h
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_n_uu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.c b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.c
index bab6391bcdca68f601cc1872f59bee3ef43978d2..53ef2836df77de1ff664d0aeadd949f7cf1a9286 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.c
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cf32_cf32_cf32_ac_t_uu.h"
 
@@ -3449,12 +3449,12 @@ void armral_fft_cf32_cf32_cf32_ac_t_uu11(const armral_cmplx_f32_t *restrict x,
     float v434 = 5.5486073394528512e-01F;
     float v438 = 1.2412944743900585e+00F;
     float v439 = -1.2412944743900585e+00F;
-    float v446 = 2.0897833842005753e-01F;
-    float v447 = -2.0897833842005753e-01F;
-    float v454 = 3.7415717312460806e-01F;
-    float v455 = -3.7415717312460806e-01F;
-    float v462 = 4.9929922194110354e-02F;
-    float v463 = -4.9929922194110354e-02F;
+    float v446 = 2.0897833842005756e-01F;
+    float v447 = -2.0897833842005756e-01F;
+    float v454 = 3.7415717312460811e-01F;
+    float v455 = -3.7415717312460811e-01F;
+    float v462 = 4.9929922194110327e-02F;
+    float v463 = -4.9929922194110327e-02F;
     float v470 = 6.5815896284539266e-01F;
     float v471 = -6.5815896284539266e-01F;
     float v478 = 6.3306543373877577e-01F;
@@ -3773,12 +3773,12 @@ void armral_fft_cf32_cf32_cf32_ac_t_uu11(const armral_cmplx_f32_t *restrict x,
     float v982 = 5.5486073394528512e-01F;
     float v985 = 1.2412944743900585e+00F;
     float v986 = -1.2412944743900585e+00F;
-    float v992 = 2.0897833842005753e-01F;
-    float v993 = -2.0897833842005753e-01F;
-    float v999 = 3.7415717312460806e-01F;
-    float v1000 = -3.7415717312460806e-01F;
-    float v1006 = 4.9929922194110354e-02F;
-    float v1007 = -4.9929922194110354e-02F;
+    float v992 = 2.0897833842005756e-01F;
+    float v993 = -2.0897833842005756e-01F;
+    float v999 = 3.7415717312460811e-01F;
+    float v1000 = -3.7415717312460811e-01F;
+    float v1006 = 4.9929922194110327e-02F;
+    float v1007 = -4.9929922194110327e-02F;
     float v1013 = 6.5815896284539266e-01F;
     float v1014 = -6.5815896284539266e-01F;
     float v1020 = 6.3306543373877577e-01F;
@@ -4051,9 +4051,9 @@ void armral_fft_cf32_cf32_cf32_ac_t_uu11(const armral_cmplx_f32_t *restrict x,
     float v265 = 1.0702757469471715e+00F;
     float v270 = 5.5486073394528512e-01F;
     float v275 = -1.2412944743900585e+00F;
-    float v282 = -2.0897833842005753e-01F;
-    float v289 = -3.7415717312460806e-01F;
-    float v296 = -4.9929922194110354e-02F;
+    float v282 = -2.0897833842005756e-01F;
+    float v289 = -3.7415717312460811e-01F;
+    float v296 = -4.9929922194110327e-02F;
     float v303 = -6.5815896284539266e-01F;
     float v310 = -6.3306543373877577e-01F;
     float v317 = -1.0822460581641109e+00F;
@@ -17320,12 +17320,12 @@ void armral_fft_cf32_cf32_cf32_ac_t_uu22(const armral_cmplx_f32_t *restrict x,
     float v1016 = 5.5486073394528512e-01F;
     float v1020 = 1.2412944743900585e+00F;
     float v1021 = -1.2412944743900585e+00F;
-    float v1028 = 2.0897833842005753e-01F;
-    float v1029 = -2.0897833842005753e-01F;
-    float v1036 = 3.7415717312460806e-01F;
-    float v1037 = -3.7415717312460806e-01F;
-    float v1044 = 4.9929922194110354e-02F;
-    float v1045 = -4.9929922194110354e-02F;
+    float v1028 = 2.0897833842005756e-01F;
+    float v1029 = -2.0897833842005756e-01F;
+    float v1036 = 3.7415717312460811e-01F;
+    float v1037 = -3.7415717312460811e-01F;
+    float v1044 = 4.9929922194110327e-02F;
+    float v1045 = -4.9929922194110327e-02F;
     float v1052 = 6.5815896284539266e-01F;
     float v1053 = -6.5815896284539266e-01F;
     float v1060 = 6.3306543373877577e-01F;
@@ -17912,12 +17912,12 @@ void armral_fft_cf32_cf32_cf32_ac_t_uu22(const armral_cmplx_f32_t *restrict x,
     float v2136 = 5.5486073394528512e-01F;
     float v2139 = 1.2412944743900585e+00F;
     float v2140 = -1.2412944743900585e+00F;
-    float v2146 = 2.0897833842005753e-01F;
-    float v2147 = -2.0897833842005753e-01F;
-    float v2153 = 3.7415717312460806e-01F;
-    float v2154 = -3.7415717312460806e-01F;
-    float v2160 = 4.9929922194110354e-02F;
-    float v2161 = -4.9929922194110354e-02F;
+    float v2146 = 2.0897833842005756e-01F;
+    float v2147 = -2.0897833842005756e-01F;
+    float v2153 = 3.7415717312460811e-01F;
+    float v2154 = -3.7415717312460811e-01F;
+    float v2160 = 4.9929922194110327e-02F;
+    float v2161 = -4.9929922194110327e-02F;
     float v2167 = 6.5815896284539266e-01F;
     float v2168 = -6.5815896284539266e-01F;
     float v2174 = 6.3306543373877577e-01F;
@@ -18414,9 +18414,9 @@ void armral_fft_cf32_cf32_cf32_ac_t_uu22(const armral_cmplx_f32_t *restrict x,
     float v661 = 1.0702757469471715e+00F;
     float v666 = 5.5486073394528512e-01F;
     float v671 = -1.2412944743900585e+00F;
-    float v678 = -2.0897833842005753e-01F;
-    float v685 = -3.7415717312460806e-01F;
-    float v692 = -4.9929922194110354e-02F;
+    float v678 = -2.0897833842005756e-01F;
+    float v685 = -3.7415717312460811e-01F;
+    float v692 = -4.9929922194110327e-02F;
     float v699 = -6.5815896284539266e-01F;
     float v706 = -6.3306543373877577e-01F;
     float v713 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.h b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.h
index 6dc44856f5f8d723a9c94378ff9a5ea1e8bde023..37e39fdb6452b37a9f4f6dd896470ba6248f0acf 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.h
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cf32_ac_t_uu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.c b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.c
index 51274540f71bfd85c373d583fe347f3dd8fd6b9e..86d9544f013d9258314ffb61ec46f0f862ae5675 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.c
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cf32_cf32_cs16_ab_t_gu.h"
 
@@ -2909,12 +2909,12 @@ void armral_fft_cf32_cf32_cs16_ab_t_gu11(const armral_cmplx_f32_t *restrict x,
     float v416 = 5.5486073394528512e-01F;
     float v419 = 1.2412944743900585e+00F;
     float v420 = -1.2412944743900585e+00F;
-    float v426 = 2.0897833842005753e-01F;
-    float v427 = -2.0897833842005753e-01F;
-    float v433 = 3.7415717312460806e-01F;
-    float v434 = -3.7415717312460806e-01F;
-    float v440 = 4.9929922194110354e-02F;
-    float v441 = -4.9929922194110354e-02F;
+    float v426 = 2.0897833842005756e-01F;
+    float v427 = -2.0897833842005756e-01F;
+    float v433 = 3.7415717312460811e-01F;
+    float v434 = -3.7415717312460811e-01F;
+    float v440 = 4.9929922194110327e-02F;
+    float v441 = -4.9929922194110327e-02F;
     float v447 = 6.5815896284539266e-01F;
     float v448 = -6.5815896284539266e-01F;
     float v454 = 6.3306543373877577e-01F;
@@ -3230,9 +3230,9 @@ void armral_fft_cf32_cf32_cs16_ab_t_gu11(const armral_cmplx_f32_t *restrict x,
     float v325 = 1.0702757469471715e+00F;
     float v330 = 5.5486073394528512e-01F;
     float v335 = -1.2412944743900585e+00F;
-    float v342 = -2.0897833842005753e-01F;
-    float v349 = -3.7415717312460806e-01F;
-    float v356 = -4.9929922194110354e-02F;
+    float v342 = -2.0897833842005756e-01F;
+    float v349 = -3.7415717312460811e-01F;
+    float v356 = -4.9929922194110327e-02F;
     float v363 = -6.5815896284539266e-01F;
     float v370 = -6.3306543373877577e-01F;
     float v377 = -1.0822460581641109e+00F;
@@ -14068,12 +14068,12 @@ void armral_fft_cf32_cf32_cs16_ab_t_gu22(const armral_cmplx_f32_t *restrict x,
     float v977 = 5.5486073394528512e-01F;
     float v980 = 1.2412944743900585e+00F;
     float v981 = -1.2412944743900585e+00F;
-    float v987 = 2.0897833842005753e-01F;
-    float v988 = -2.0897833842005753e-01F;
-    float v994 = 3.7415717312460806e-01F;
-    float v995 = -3.7415717312460806e-01F;
-    float v1001 = 4.9929922194110354e-02F;
-    float v1002 = -4.9929922194110354e-02F;
+    float v987 = 2.0897833842005756e-01F;
+    float v988 = -2.0897833842005756e-01F;
+    float v994 = 3.7415717312460811e-01F;
+    float v995 = -3.7415717312460811e-01F;
+    float v1001 = 4.9929922194110327e-02F;
+    float v1002 = -4.9929922194110327e-02F;
     float v1008 = 6.5815896284539266e-01F;
     float v1009 = -6.5815896284539266e-01F;
     float v1015 = 6.3306543373877577e-01F;
@@ -14657,9 +14657,9 @@ void armral_fft_cf32_cf32_cs16_ab_t_gu22(const armral_cmplx_f32_t *restrict x,
     float v787 = 1.0702757469471715e+00F;
     float v792 = 5.5486073394528512e-01F;
     float v797 = -1.2412944743900585e+00F;
-    float v804 = -2.0897833842005753e-01F;
-    float v811 = -3.7415717312460806e-01F;
-    float v818 = -4.9929922194110354e-02F;
+    float v804 = -2.0897833842005756e-01F;
+    float v811 = -3.7415717312460811e-01F;
+    float v818 = -4.9929922194110327e-02F;
     float v825 = -6.5815896284539266e-01F;
     float v832 = -6.3306543373877577e-01F;
     float v839 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.h b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.h
index a540a90aebe5340cfd1c30e17ff183a166c44751..aaba87482ad180df6e4b640329b45304d13459c6 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.h
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ab_t_gu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.c b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.c
index fede57276ae9d658ab46443f90b6df28680109c1..33d7282ad7d9d2584cc737d7db2e2df3f362990a 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.c
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cf32_cf32_cs16_ac_n_uu.h"
 
@@ -2853,12 +2853,12 @@ void armral_fft_cf32_cf32_cs16_ac_n_uu11(const armral_cmplx_f32_t *restrict x,
     float v194 = 5.5486073394528512e-01F;
     float v198 = 1.2412944743900585e+00F;
     float v199 = -1.2412944743900585e+00F;
-    float v206 = 2.0897833842005753e-01F;
-    float v207 = -2.0897833842005753e-01F;
-    float v214 = 3.7415717312460806e-01F;
-    float v215 = -3.7415717312460806e-01F;
-    float v222 = 4.9929922194110354e-02F;
-    float v223 = -4.9929922194110354e-02F;
+    float v206 = 2.0897833842005756e-01F;
+    float v207 = -2.0897833842005756e-01F;
+    float v214 = 3.7415717312460811e-01F;
+    float v215 = -3.7415717312460811e-01F;
+    float v222 = 4.9929922194110327e-02F;
+    float v223 = -4.9929922194110327e-02F;
     float v230 = 6.5815896284539266e-01F;
     float v231 = -6.5815896284539266e-01F;
     float v238 = 6.3306543373877577e-01F;
@@ -3108,12 +3108,12 @@ void armral_fft_cf32_cf32_cs16_ac_n_uu11(const armral_cmplx_f32_t *restrict x,
     float v553 = 5.5486073394528512e-01F;
     float v556 = 1.2412944743900585e+00F;
     float v557 = -1.2412944743900585e+00F;
-    float v563 = 2.0897833842005753e-01F;
-    float v564 = -2.0897833842005753e-01F;
-    float v570 = 3.7415717312460806e-01F;
-    float v571 = -3.7415717312460806e-01F;
-    float v577 = 4.9929922194110354e-02F;
-    float v578 = -4.9929922194110354e-02F;
+    float v563 = 2.0897833842005756e-01F;
+    float v564 = -2.0897833842005756e-01F;
+    float v570 = 3.7415717312460811e-01F;
+    float v571 = -3.7415717312460811e-01F;
+    float v577 = 4.9929922194110327e-02F;
+    float v578 = -4.9929922194110327e-02F;
     float v584 = 6.5815896284539266e-01F;
     float v585 = -6.5815896284539266e-01F;
     float v591 = 6.3306543373877577e-01F;
@@ -3346,9 +3346,9 @@ void armral_fft_cf32_cf32_cs16_ac_n_uu11(const armral_cmplx_f32_t *restrict x,
     float v185 = 1.0702757469471715e+00F;
     float v190 = 5.5486073394528512e-01F;
     float v195 = -1.2412944743900585e+00F;
-    float v202 = -2.0897833842005753e-01F;
-    float v209 = -3.7415717312460806e-01F;
-    float v216 = -4.9929922194110354e-02F;
+    float v202 = -2.0897833842005756e-01F;
+    float v209 = -3.7415717312460811e-01F;
+    float v216 = -4.9929922194110327e-02F;
     float v223 = -6.5815896284539266e-01F;
     float v230 = -6.3306543373877577e-01F;
     float v237 = -1.0822460581641109e+00F;
@@ -14512,12 +14512,12 @@ void armral_fft_cf32_cf32_cs16_ac_n_uu22(const armral_cmplx_f32_t *restrict x,
     float v512 = 5.5486073394528512e-01F;
     float v516 = 1.2412944743900585e+00F;
     float v517 = -1.2412944743900585e+00F;
-    float v524 = 2.0897833842005753e-01F;
-    float v525 = -2.0897833842005753e-01F;
-    float v532 = 3.7415717312460806e-01F;
-    float v533 = -3.7415717312460806e-01F;
-    float v540 = 4.9929922194110354e-02F;
-    float v541 = -4.9929922194110354e-02F;
+    float v524 = 2.0897833842005756e-01F;
+    float v525 = -2.0897833842005756e-01F;
+    float v532 = 3.7415717312460811e-01F;
+    float v533 = -3.7415717312460811e-01F;
+    float v540 = 4.9929922194110327e-02F;
+    float v541 = -4.9929922194110327e-02F;
     float v548 = 6.5815896284539266e-01F;
     float v549 = -6.5815896284539266e-01F;
     float v556 = 6.3306543373877577e-01F;
@@ -14958,12 +14958,12 @@ void armral_fft_cf32_cf32_cs16_ac_n_uu22(const armral_cmplx_f32_t *restrict x,
     float v1234 = 5.5486073394528512e-01F;
     float v1237 = 1.2412944743900585e+00F;
     float v1238 = -1.2412944743900585e+00F;
-    float v1244 = 2.0897833842005753e-01F;
-    float v1245 = -2.0897833842005753e-01F;
-    float v1251 = 3.7415717312460806e-01F;
-    float v1252 = -3.7415717312460806e-01F;
-    float v1258 = 4.9929922194110354e-02F;
-    float v1259 = -4.9929922194110354e-02F;
+    float v1244 = 2.0897833842005756e-01F;
+    float v1245 = -2.0897833842005756e-01F;
+    float v1251 = 3.7415717312460811e-01F;
+    float v1252 = -3.7415717312460811e-01F;
+    float v1258 = 4.9929922194110327e-02F;
+    float v1259 = -4.9929922194110327e-02F;
     float v1265 = 6.5815896284539266e-01F;
     float v1266 = -6.5815896284539266e-01F;
     float v1272 = 6.3306543373877577e-01F;
@@ -15376,9 +15376,9 @@ void armral_fft_cf32_cf32_cs16_ac_n_uu22(const armral_cmplx_f32_t *restrict x,
     float v493 = 1.0702757469471715e+00F;
     float v498 = 5.5486073394528512e-01F;
     float v503 = -1.2412944743900585e+00F;
-    float v510 = -2.0897833842005753e-01F;
-    float v517 = -3.7415717312460806e-01F;
-    float v524 = -4.9929922194110354e-02F;
+    float v510 = -2.0897833842005756e-01F;
+    float v517 = -3.7415717312460811e-01F;
+    float v524 = -4.9929922194110327e-02F;
     float v531 = -6.5815896284539266e-01F;
     float v538 = -6.3306543373877577e-01F;
     float v545 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.h b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.h
index 9dfd66c512ec07d0c547c559fc3b822203753b1d..8bbb2dee041ca4ce1c8f8cd28abb2a14eb2009a5 100644
--- a/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.h
+++ b/src/LowerPHY/FFT/fft_cf32_cf32_cs16_ac_n_uu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cf32_kernel_lookup.c b/src/LowerPHY/FFT/fft_cf32_kernel_lookup.c
index 531eba9a5b1ed11df46524f2c0b50e3cd59d9add..cac45e521448cc54c9def99fd943e868f7f3eb8b 100644
--- a/src/LowerPHY/FFT/fft_cf32_kernel_lookup.c
+++ b/src/LowerPHY/FFT/fft_cf32_kernel_lookup.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "fft_cf32_kernel_lookup.h"
diff --git a/src/LowerPHY/FFT/fft_cf32_kernel_lookup.h b/src/LowerPHY/FFT/fft_cf32_kernel_lookup.h
index b0a10f3d4d919b1e15ffdf95af7ffad177ac5c89..9f0f294bb7306a1914f66681128cbb8a8831e973 100644
--- a/src/LowerPHY/FFT/fft_cf32_kernel_lookup.h
+++ b/src/LowerPHY/FFT/fft_cf32_kernel_lookup.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cs16.cpp b/src/LowerPHY/FFT/fft_cs16.cpp
index 50ac067b341bf0d1f5b571ed617118111b713a44..2da312ac658911449b215da9769f31be92b4b683 100644
--- a/src/LowerPHY/FFT/fft_cs16.cpp
+++ b/src/LowerPHY/FFT/fft_cs16.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "fft_execute.hpp"
diff --git a/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.c b/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.c
index e8a2aa01f508d25164ecb3a7f0d498fc1f5685f7..ecb656613d38d799e19a64538511ae1dd39b6561 100644
--- a/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.c
+++ b/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cs16_cf32_cf32_ac_n_uu.h"
 
@@ -955,12 +955,12 @@ void armral_fft_cs16_cf32_cf32_ac_n_uu11(const armral_cmplx_int16_t *restrict x,
     float v205 = 5.5486073394528512e-01F;
     float v209 = 1.2412944743900585e+00F;
     float v210 = -1.2412944743900585e+00F;
-    float v217 = 2.0897833842005753e-01F;
-    float v218 = -2.0897833842005753e-01F;
-    float v225 = 3.7415717312460806e-01F;
-    float v226 = -3.7415717312460806e-01F;
-    float v233 = 4.9929922194110354e-02F;
-    float v234 = -4.9929922194110354e-02F;
+    float v217 = 2.0897833842005756e-01F;
+    float v218 = -2.0897833842005756e-01F;
+    float v225 = 3.7415717312460811e-01F;
+    float v226 = -3.7415717312460811e-01F;
+    float v233 = 4.9929922194110327e-02F;
+    float v234 = -4.9929922194110327e-02F;
     float v241 = 6.5815896284539266e-01F;
     float v242 = -6.5815896284539266e-01F;
     float v249 = 6.3306543373877577e-01F;
@@ -1210,12 +1210,12 @@ void armral_fft_cs16_cf32_cf32_ac_n_uu11(const armral_cmplx_int16_t *restrict x,
     float v564 = 5.5486073394528512e-01F;
     float v567 = 1.2412944743900585e+00F;
     float v568 = -1.2412944743900585e+00F;
-    float v574 = 2.0897833842005753e-01F;
-    float v575 = -2.0897833842005753e-01F;
-    float v581 = 3.7415717312460806e-01F;
-    float v582 = -3.7415717312460806e-01F;
-    float v588 = 4.9929922194110354e-02F;
-    float v589 = -4.9929922194110354e-02F;
+    float v574 = 2.0897833842005756e-01F;
+    float v575 = -2.0897833842005756e-01F;
+    float v581 = 3.7415717312460811e-01F;
+    float v582 = -3.7415717312460811e-01F;
+    float v588 = 4.9929922194110327e-02F;
+    float v589 = -4.9929922194110327e-02F;
     float v595 = 6.5815896284539266e-01F;
     float v596 = -6.5815896284539266e-01F;
     float v602 = 6.3306543373877577e-01F;
@@ -1437,9 +1437,9 @@ void armral_fft_cs16_cf32_cf32_ac_n_uu11(const armral_cmplx_int16_t *restrict x,
     float v196 = 1.0702757469471715e+00F;
     float v201 = 5.5486073394528512e-01F;
     float v206 = -1.2412944743900585e+00F;
-    float v213 = -2.0897833842005753e-01F;
-    float v220 = -3.7415717312460806e-01F;
-    float v227 = -4.9929922194110354e-02F;
+    float v213 = -2.0897833842005756e-01F;
+    float v220 = -3.7415717312460811e-01F;
+    float v227 = -4.9929922194110327e-02F;
     float v234 = -6.5815896284539266e-01F;
     float v241 = -6.3306543373877577e-01F;
     float v248 = -1.0822460581641109e+00F;
@@ -11555,12 +11555,12 @@ void armral_fft_cs16_cf32_cf32_ac_n_uu22(const armral_cmplx_int16_t *restrict x,
     float v534 = 5.5486073394528512e-01F;
     float v538 = 1.2412944743900585e+00F;
     float v539 = -1.2412944743900585e+00F;
-    float v546 = 2.0897833842005753e-01F;
-    float v547 = -2.0897833842005753e-01F;
-    float v554 = 3.7415717312460806e-01F;
-    float v555 = -3.7415717312460806e-01F;
-    float v562 = 4.9929922194110354e-02F;
-    float v563 = -4.9929922194110354e-02F;
+    float v546 = 2.0897833842005756e-01F;
+    float v547 = -2.0897833842005756e-01F;
+    float v554 = 3.7415717312460811e-01F;
+    float v555 = -3.7415717312460811e-01F;
+    float v562 = 4.9929922194110327e-02F;
+    float v563 = -4.9929922194110327e-02F;
     float v570 = 6.5815896284539266e-01F;
     float v571 = -6.5815896284539266e-01F;
     float v578 = 6.3306543373877577e-01F;
@@ -12001,12 +12001,12 @@ void armral_fft_cs16_cf32_cf32_ac_n_uu22(const armral_cmplx_int16_t *restrict x,
     float v1256 = 5.5486073394528512e-01F;
     float v1259 = 1.2412944743900585e+00F;
     float v1260 = -1.2412944743900585e+00F;
-    float v1266 = 2.0897833842005753e-01F;
-    float v1267 = -2.0897833842005753e-01F;
-    float v1273 = 3.7415717312460806e-01F;
-    float v1274 = -3.7415717312460806e-01F;
-    float v1280 = 4.9929922194110354e-02F;
-    float v1281 = -4.9929922194110354e-02F;
+    float v1266 = 2.0897833842005756e-01F;
+    float v1267 = -2.0897833842005756e-01F;
+    float v1273 = 3.7415717312460811e-01F;
+    float v1274 = -3.7415717312460811e-01F;
+    float v1280 = 4.9929922194110327e-02F;
+    float v1281 = -4.9929922194110327e-02F;
     float v1287 = 6.5815896284539266e-01F;
     float v1288 = -6.5815896284539266e-01F;
     float v1294 = 6.3306543373877577e-01F;
@@ -12397,9 +12397,9 @@ void armral_fft_cs16_cf32_cf32_ac_n_uu22(const armral_cmplx_int16_t *restrict x,
     float v515 = 1.0702757469471715e+00F;
     float v520 = 5.5486073394528512e-01F;
     float v525 = -1.2412944743900585e+00F;
-    float v532 = -2.0897833842005753e-01F;
-    float v539 = -3.7415717312460806e-01F;
-    float v546 = -4.9929922194110354e-02F;
+    float v532 = -2.0897833842005756e-01F;
+    float v539 = -3.7415717312460811e-01F;
+    float v546 = -4.9929922194110327e-02F;
     float v553 = -6.5815896284539266e-01F;
     float v560 = -6.3306543373877577e-01F;
     float v567 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.h b/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.h
index d4ff9291acdfdb3e3f036e0482a77d15c24195d3..fe8b7508f48c0d880a18e9ab55a6e1dc88f23d60 100644
--- a/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.h
+++ b/src/LowerPHY/FFT/fft_cs16_cf32_cf32_ac_n_uu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.c b/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.c
index e07c745cd862339472a6b9bf98a10eef5ceaeee8..609bf1df3712a817bc8d2527054742fba3e70839 100644
--- a/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.c
+++ b/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_cs16_cf32_cs16_ac_n_uu.h"
 
@@ -3123,12 +3123,12 @@ void armral_fft_cs16_cf32_cs16_ac_n_uu11(const armral_cmplx_int16_t *restrict x,
     float v205 = 5.5486073394528512e-01F;
     float v209 = 1.2412944743900585e+00F;
     float v210 = -1.2412944743900585e+00F;
-    float v217 = 2.0897833842005753e-01F;
-    float v218 = -2.0897833842005753e-01F;
-    float v225 = 3.7415717312460806e-01F;
-    float v226 = -3.7415717312460806e-01F;
-    float v233 = 4.9929922194110354e-02F;
-    float v234 = -4.9929922194110354e-02F;
+    float v217 = 2.0897833842005756e-01F;
+    float v218 = -2.0897833842005756e-01F;
+    float v225 = 3.7415717312460811e-01F;
+    float v226 = -3.7415717312460811e-01F;
+    float v233 = 4.9929922194110327e-02F;
+    float v234 = -4.9929922194110327e-02F;
     float v241 = 6.5815896284539266e-01F;
     float v242 = -6.5815896284539266e-01F;
     float v249 = 6.3306543373877577e-01F;
@@ -3389,12 +3389,12 @@ void armral_fft_cs16_cf32_cs16_ac_n_uu11(const armral_cmplx_int16_t *restrict x,
     float v575 = 5.5486073394528512e-01F;
     float v578 = 1.2412944743900585e+00F;
     float v579 = -1.2412944743900585e+00F;
-    float v585 = 2.0897833842005753e-01F;
-    float v586 = -2.0897833842005753e-01F;
-    float v592 = 3.7415717312460806e-01F;
-    float v593 = -3.7415717312460806e-01F;
-    float v599 = 4.9929922194110354e-02F;
-    float v600 = -4.9929922194110354e-02F;
+    float v585 = 2.0897833842005756e-01F;
+    float v586 = -2.0897833842005756e-01F;
+    float v592 = 3.7415717312460811e-01F;
+    float v593 = -3.7415717312460811e-01F;
+    float v599 = 4.9929922194110327e-02F;
+    float v600 = -4.9929922194110327e-02F;
     float v606 = 6.5815896284539266e-01F;
     float v607 = -6.5815896284539266e-01F;
     float v613 = 6.3306543373877577e-01F;
@@ -3638,9 +3638,9 @@ void armral_fft_cs16_cf32_cs16_ac_n_uu11(const armral_cmplx_int16_t *restrict x,
     float v196 = 1.0702757469471715e+00F;
     float v201 = 5.5486073394528512e-01F;
     float v206 = -1.2412944743900585e+00F;
-    float v213 = -2.0897833842005753e-01F;
-    float v220 = -3.7415717312460806e-01F;
-    float v227 = -4.9929922194110354e-02F;
+    float v213 = -2.0897833842005756e-01F;
+    float v220 = -3.7415717312460811e-01F;
+    float v227 = -4.9929922194110327e-02F;
     float v234 = -6.5815896284539266e-01F;
     float v241 = -6.3306543373877577e-01F;
     float v248 = -1.0822460581641109e+00F;
@@ -15662,12 +15662,12 @@ void armral_fft_cs16_cf32_cs16_ac_n_uu22(const armral_cmplx_int16_t *restrict x,
     float v534 = 5.5486073394528512e-01F;
     float v538 = 1.2412944743900585e+00F;
     float v539 = -1.2412944743900585e+00F;
-    float v546 = 2.0897833842005753e-01F;
-    float v547 = -2.0897833842005753e-01F;
-    float v554 = 3.7415717312460806e-01F;
-    float v555 = -3.7415717312460806e-01F;
-    float v562 = 4.9929922194110354e-02F;
-    float v563 = -4.9929922194110354e-02F;
+    float v546 = 2.0897833842005756e-01F;
+    float v547 = -2.0897833842005756e-01F;
+    float v554 = 3.7415717312460811e-01F;
+    float v555 = -3.7415717312460811e-01F;
+    float v562 = 4.9929922194110327e-02F;
+    float v563 = -4.9929922194110327e-02F;
     float v570 = 6.5815896284539266e-01F;
     float v571 = -6.5815896284539266e-01F;
     float v578 = 6.3306543373877577e-01F;
@@ -16130,12 +16130,12 @@ void armral_fft_cs16_cf32_cs16_ac_n_uu22(const armral_cmplx_int16_t *restrict x,
     float v1278 = 5.5486073394528512e-01F;
     float v1281 = 1.2412944743900585e+00F;
     float v1282 = -1.2412944743900585e+00F;
-    float v1288 = 2.0897833842005753e-01F;
-    float v1289 = -2.0897833842005753e-01F;
-    float v1295 = 3.7415717312460806e-01F;
-    float v1296 = -3.7415717312460806e-01F;
-    float v1302 = 4.9929922194110354e-02F;
-    float v1303 = -4.9929922194110354e-02F;
+    float v1288 = 2.0897833842005756e-01F;
+    float v1289 = -2.0897833842005756e-01F;
+    float v1295 = 3.7415717312460811e-01F;
+    float v1296 = -3.7415717312460811e-01F;
+    float v1302 = 4.9929922194110327e-02F;
+    float v1303 = -4.9929922194110327e-02F;
     float v1309 = 6.5815896284539266e-01F;
     float v1310 = -6.5815896284539266e-01F;
     float v1316 = 6.3306543373877577e-01F;
@@ -16570,9 +16570,9 @@ void armral_fft_cs16_cf32_cs16_ac_n_uu22(const armral_cmplx_int16_t *restrict x,
     float v515 = 1.0702757469471715e+00F;
     float v520 = 5.5486073394528512e-01F;
     float v525 = -1.2412944743900585e+00F;
-    float v532 = -2.0897833842005753e-01F;
-    float v539 = -3.7415717312460806e-01F;
-    float v546 = -4.9929922194110354e-02F;
+    float v532 = -2.0897833842005756e-01F;
+    float v539 = -3.7415717312460811e-01F;
+    float v546 = -4.9929922194110327e-02F;
     float v553 = -6.5815896284539266e-01F;
     float v560 = -6.3306543373877577e-01F;
     float v567 = -1.0822460581641109e+00F;
diff --git a/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.h b/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.h
index 95273f4aa5f2fa28a97e783e0204f55cd5162848..163f8631af756178af6c6919622c069d9933d34e 100644
--- a/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.h
+++ b/src/LowerPHY/FFT/fft_cs16_cf32_cs16_ac_n_uu.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_cs16_kernel_lookup.c b/src/LowerPHY/FFT/fft_cs16_kernel_lookup.c
index c3163fcdda7dc3b3f8e0eb83d9f9b29a15338392..20287bc2c9c459a0020f2eb6e719ccb7b8352960 100644
--- a/src/LowerPHY/FFT/fft_cs16_kernel_lookup.c
+++ b/src/LowerPHY/FFT/fft_cs16_kernel_lookup.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "fft_cs16_kernel_lookup.h"
diff --git a/src/LowerPHY/FFT/fft_cs16_kernel_lookup.h b/src/LowerPHY/FFT/fft_cs16_kernel_lookup.h
index 98a229f9b2967d948d1d3ef90dac0f487a674e4b..8476f0ed6d46bb5dbb5121a70875534fa9658d99 100644
--- a/src/LowerPHY/FFT/fft_cs16_kernel_lookup.h
+++ b/src/LowerPHY/FFT/fft_cs16_kernel_lookup.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_execute.cpp b/src/LowerPHY/FFT/fft_execute.cpp
index 420c70ff05e84d8cbf3fe4564220f95a898dfe08..02313339e585ec1e84f05c5b2458b02fd11a3d63 100644
--- a/src/LowerPHY/FFT/fft_execute.cpp
+++ b/src/LowerPHY/FFT/fft_execute.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "fft_execute.hpp"
diff --git a/src/LowerPHY/FFT/fft_execute.hpp b/src/LowerPHY/FFT/fft_execute.hpp
index 803a3d33314ed1f273e80fc3631a7aa1b44e9571..714d2575a3a52641952dc553ce57207faab1c047 100644
--- a/src/LowerPHY/FFT/fft_execute.hpp
+++ b/src/LowerPHY/FFT/fft_execute.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_helper.h b/src/LowerPHY/FFT/fft_helper.h
index 9dfbc609fb6dd21fdf7b193d1c8f5e98b0ec9bd8..98f7c51ac48c49cd4d46e13e0286f9ab3eb17a92 100644
--- a/src/LowerPHY/FFT/fft_helper.h
+++ b/src/LowerPHY/FFT/fft_helper.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_level.cpp b/src/LowerPHY/FFT/fft_level.cpp
index b5c45e1805f04c4d23f273e9151f939fd343ad14..a4402dc3658e6e4d6bdf462547c868548bc278c8 100644
--- a/src/LowerPHY/FFT/fft_level.cpp
+++ b/src/LowerPHY/FFT/fft_level.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_level.hpp"
 
diff --git a/src/LowerPHY/FFT/fft_level.hpp b/src/LowerPHY/FFT/fft_level.hpp
index 80bbdef3133422e7534d366941827a99cb5ad5f9..06cc0df09f3d0e3838b75c33426236b0520a5c32 100644
--- a/src/LowerPHY/FFT/fft_level.hpp
+++ b/src/LowerPHY/FFT/fft_level.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_plan.cpp b/src/LowerPHY/FFT/fft_plan.cpp
index f6a2e561e634eadbd3d4d42b7bbbf57a1107a2a0..833680a09ad7f7439fec55200435c071678d09cd 100644
--- a/src/LowerPHY/FFT/fft_plan.cpp
+++ b/src/LowerPHY/FFT/fft_plan.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "fft_plan.hpp"
 #include "fft_cf32_kernel_lookup.h"
diff --git a/src/LowerPHY/FFT/fft_plan.hpp b/src/LowerPHY/FFT/fft_plan.hpp
index d36715f93a0a40dcdf2679d3d051d7db95e2fb39..419622390f395d13b37454b3df54e61fca6dd802 100644
--- a/src/LowerPHY/FFT/fft_plan.hpp
+++ b/src/LowerPHY/FFT/fft_plan.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/fft_types.hpp b/src/LowerPHY/FFT/fft_types.hpp
index 7d6d67daf43ea9490f57828f4476d51b27c18184..65ebfa64dea0408242f43e1bc5db50909037ab98 100644
--- a/src/LowerPHY/FFT/fft_types.hpp
+++ b/src/LowerPHY/FFT/fft_types.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/rader.cpp b/src/LowerPHY/FFT/rader.cpp
index 309ebe82d1b5c973bd7f7ba738cf3a74389241e5..a05479cfc94daf8f4aaebf4107813ab6f03da2d4 100644
--- a/src/LowerPHY/FFT/rader.cpp
+++ b/src/LowerPHY/FFT/rader.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "rader.hpp"
diff --git a/src/LowerPHY/FFT/rader.hpp b/src/LowerPHY/FFT/rader.hpp
index eacaabdad6b7d203bf3db0008ee1161b3e851859..6d1d21f004443425dd6b38d00718d0fe3c1e1e05 100644
--- a/src/LowerPHY/FFT/rader.hpp
+++ b/src/LowerPHY/FFT/rader.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FFT/rader_generator.cpp b/src/LowerPHY/FFT/rader_generator.cpp
index b2c1c6563409b5ba642c135c9f4ed1431bc7da70..89e138630b5646cd6e2f329a27b9b30a09a9994b 100644
--- a/src/LowerPHY/FFT/rader_generator.cpp
+++ b/src/LowerPHY/FFT/rader_generator.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "rader_generator.hpp"
 
diff --git a/src/LowerPHY/FFT/rader_generator.hpp b/src/LowerPHY/FFT/rader_generator.hpp
index 30d6ca2cd4272e92f1745d9d48b960f0fcc0b1a2..bc219d9c100719b16dae1c63859d79564d9057de 100644
--- a/src/LowerPHY/FFT/rader_generator.hpp
+++ b/src/LowerPHY/FFT/rader_generator.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/LowerPHY/FIR/arm_fir_filter_cf32.c b/src/LowerPHY/FIR/arm_fir_filter_cf32.c
index 6bbefd7f0b6b53badad7a1e7c0c10590a8da6b1c..428e2312d704d3b845356c3fd1c0045730c4c087 100644
--- a/src/LowerPHY/FIR/arm_fir_filter_cf32.c
+++ b/src/LowerPHY/FIR/arm_fir_filter_cf32.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -16,7 +16,7 @@ static inline svfloat32x4_t fir_sve_blk_4(svbool_t pg, const float *in,
   // Compute FIR for four vector-lengths of data. Coeffs array is
   // unrolled by 2 and we have 2 accumulators per vector length, as
   // explained in fir_sve_blk_2. In addition, loads and mlas are
-  // hand-interleaved in order to minimise latency.
+  // hand-interleaved in order to minimize latency.
 
   svfloat32_t y1_1 = svdup_f32(0);
   svfloat32_t y2_1 = svdup_f32(0);
diff --git a/src/LowerPHY/FIR/arm_fir_filter_cf32_decimate_2.c b/src/LowerPHY/FIR/arm_fir_filter_cf32_decimate_2.c
index 05446b13c60dabf1221014e0072f6b1ad4f4e18b..e60e5e0e3f520c9ce527aacd83ea44ca05da585b 100644
--- a/src/LowerPHY/FIR/arm_fir_filter_cf32_decimate_2.c
+++ b/src/LowerPHY/FIR/arm_fir_filter_cf32_decimate_2.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -127,7 +127,7 @@ static inline void sv_fir_block_4(svbool_t pg,
   // Compute FIR for 4 vector-lengths of data (read 8 vector-lengths, write 4).
   // Coeffs array is unrolled by 2 and we have 2 accumulators per vector length,
   // as explained in sv_fir_block_2. In addition, loads and mlas are
-  // hand-interleaved in order to minimise latency.
+  // hand-interleaved in order to minimize latency.
 
   const uint64_t *in = (const uint64_t *)input;
   svfloat32_t y_0_0 = svdup_f32(0);
@@ -235,7 +235,7 @@ static inline void sv_fir_block_8(svbool_t pg,
   // Unlike the previous 2 versions, we only need 1 accumulator per vector
   // length, as we have enough accumulators to hide the latency of ld2 and cmla
   // without needing to split them in half. Again, loads and mlas are
-  // hand-interleaved in order to minimise latency.
+  // hand-interleaved in order to minimize latency.
   const uint64_t *in = (const uint64_t *)input;
   svfloat32_t y_0 = svdup_f32(0);
   svfloat32_t y_1 = svdup_f32(0);
diff --git a/src/LowerPHY/FIR/arm_fir_filter_cs16.c b/src/LowerPHY/FIR/arm_fir_filter_cs16.c
index 850279bfeeb7d8d77c2130534489b96d777c6ce1..71d03c1d423a1d6ffb0a1a065731c469006ba4ff 100644
--- a/src/LowerPHY/FIR/arm_fir_filter_cs16.c
+++ b/src/LowerPHY/FIR/arm_fir_filter_cs16.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
@@ -438,11 +438,10 @@ armral_status armral_fir_filter_cs16(
   for (; i + svcntw() <= size; i += svcntw()) {
     sv_fir_block(input + i, coeffs, output + i, ptrue, taps);
   }
-  // Input array is not long enough to load svcntw() elements any more, so go 8
-  // elements at a time (the spec requires size of the input to be rounded up to
-  // a multiple of 8)
-  svbool_t pg = svwhilelt_b16(0U, size + taps);
-  for (; i + 8 <= size; i += 8) {
+  // Input array is not long enough to load svcntw() elements any more, so
+  // process the remaining elements with predication
+  if (i < size) {
+    svbool_t pg = svwhilelt_b16(i, size);
     sv_fir_block(input + i, coeffs, output + i, pg, taps);
   }
 #else
diff --git a/src/LowerPHY/FIR/arm_fir_filter_cs16_decimate_2.c b/src/LowerPHY/FIR/arm_fir_filter_cs16_decimate_2.c
index 52c32a664b7793873faec67b6b66e2bbd440913c..0bb694772c7dab3a574fe09cb668ebf0f757622f 100644
--- a/src/LowerPHY/FIR/arm_fir_filter_cs16_decimate_2.c
+++ b/src/LowerPHY/FIR/arm_fir_filter_cs16_decimate_2.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/LowerPHY/Scrambling/arm_scrambling.cpp b/src/LowerPHY/Scrambling/arm_scrambling.cpp
index f65c7753ac78342ba869ae10326224f08997cc83..3ff12f615bc0927f8a6fde3a34babd90a015c771 100644
--- a/src/LowerPHY/Scrambling/arm_scrambling.cpp
+++ b/src/LowerPHY/Scrambling/arm_scrambling.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/LowerPHY/SeqGenerator/arm_mat_seq_generator.cpp b/src/LowerPHY/SeqGenerator/arm_mat_seq_generator.cpp
index c56628d93580a86b3a26ea7d9c706d1ff231d4bf..98452a053fe1b7adef904271796e7b55c5148fbf 100644
--- a/src/LowerPHY/SeqGenerator/arm_mat_seq_generator.cpp
+++ b/src/LowerPHY/SeqGenerator/arm_mat_seq_generator.cpp
@@ -1,9 +1,13 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
+#ifdef ARMRAL_ARCH_SVE
+#include <arm_sve.h>
+#endif
+
 template<unsigned int N>
 static inline void generate_seq_128(uint64_t *x) {
   static_assert(N == 2);
@@ -92,13 +96,21 @@ armral_status armral_seq_generator(uint32_t sequence_len, uint32_t seed,
 
   // Tail
   if ((sequence_len % 64) != 0) {
-    uint8_t *p_out_tail = (uint8_t *)p_out;
-    uint64_t ptemp_res = x1 ^ x2;
     uint8_t tail_length = ((sequence_len % 64) + 7) >> 3;
+    uint64_t ptemp_res = x1 ^ x2;
+#ifdef ARMRAL_ARCH_SVE
+    svbool_t pg = svwhilelt_b8(0, tail_length);
+    svuint64_t splat_val = svdup_u64(ptemp_res);
+    svuint8_t splat_val8 = svreinterpret_u8_u64(splat_val);
+    svst1_u8(pg, (uint8_t *)p_out, splat_val8);
+#else
+    uint8_t *p_out_tail = (uint8_t *)p_out;
     for (uint32_t i = 0; i < tail_length; i++) {
       (*p_out_tail) = (uint8_t)(ptemp_res >> (i * 8));
       p_out_tail++;
     }
+#endif
   }
+
   return ARMRAL_SUCCESS;
 }
diff --git a/src/SVD/arm_svd.cpp b/src/SVD/arm_svd.cpp
index 1e4e6f25a79cdf1df4f808c3e4d16df9b3d8e528..1d24eeb1a66f450e45c2875f31bf827cad92eeea 100644
--- a/src/SVD/arm_svd.cpp
+++ b/src/SVD/arm_svd.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "armral.h"
@@ -798,7 +798,7 @@ armral_status armral_svd_bidiagonal(bool gen_singular_vectors, int m, int n,
 }
 
 // Apply implicitly Q to an input matrix C of the same dimension
-// as the marix A that has been factorised into QR or bidiagonalisation.
+// as the matrix A that has been factorised into QR or bidiagonalisation.
 struct apply_q_work_buffers {
   armral_cmplx_f32_t *q;
 };
diff --git a/src/SVD/matrix_view.hpp b/src/SVD/matrix_view.hpp
index 2bcef8369c84a55b283471c85468ca3da592110c..cc2c4d84ec6f1bef8f3d9cbc9bcb1f5e16325e6c 100644
--- a/src/SVD/matrix_view.hpp
+++ b/src/SVD/matrix_view.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #pragma once
@@ -12,10 +12,6 @@ template<typename T>
 struct column_major_matrix_view {
   column_major_matrix_view(T *data, int stride)
     : m_data(data), m_stride(stride) {}
-#ifdef SVD_TEST
-  column_major_matrix_view(std::vector<T> &data, int stride)
-    : column_major_matrix_view(data.data(), stride) {}
-#endif
 
   T &operator()(int i, int j) {
     return m_data[i + stride() * j];
diff --git a/src/UpperPHY/CRC/arm_crc11.cpp b/src/UpperPHY/CRC/arm_crc11.cpp
index 3104b4755528b7680e987e05d3c01a920aab96bf..c65f3b11bff6f03f1b9b192d59ad5628a5dee658 100644
--- a/src/UpperPHY/CRC/arm_crc11.cpp
+++ b/src/UpperPHY/CRC/arm_crc11.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "crc_common.hpp"
@@ -9,7 +9,7 @@
 static const poly64_t crc11_data[] = {
     // (1<<(64*k)) mod P_CRC, for k = 10
     0xa080000000000000,
-    // (1<<128) / P - (1<<64)
+    // (1<<128) / P_CRC - (1<<64)
     0xb3fa1f48b92fa293,
     // (1<<(64*k)) mod P_CRC, for k in [1,1,2,3,4,5,6,7,8,9]
     0xc420000000000000, 0xc420000000000000, 0x5e60000000000000,
diff --git a/src/UpperPHY/CRC/arm_crc16.cpp b/src/UpperPHY/CRC/arm_crc16.cpp
index 5029eaeb73c527cc0bbd72a32279aca5249bb5e6..42204c3985b97d30eeaf1140d802948c6ef387ca 100644
--- a/src/UpperPHY/CRC/arm_crc16.cpp
+++ b/src/UpperPHY/CRC/arm_crc16.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "crc_common.hpp"
@@ -9,7 +9,7 @@
 static const poly64_t crc16_data[] = {
     // (1<<(64*k)) mod P_CRC, for k = 10
     0x8420000000000000,
-    // (1<<128) / P - (1<<64)
+    // (1<<128) / P_CRC - (1<<64)
     0x11303471a041b343,
     // (1<<(64*k)) mod P_CRC, for k in [1,1,2,3,4,5,6,7,8,9]
     0x1021000000000000, 0x1021000000000000, 0xeb23000000000000,
diff --git a/src/UpperPHY/CRC/arm_crc24_a.cpp b/src/UpperPHY/CRC/arm_crc24_a.cpp
index d02d2eebb1a6b476b3f601ee2870a394d83be1a3..3eac9c4e735ff02b954feb984534175fef04e80d 100644
--- a/src/UpperPHY/CRC/arm_crc24_a.cpp
+++ b/src/UpperPHY/CRC/arm_crc24_a.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "crc_common.hpp"
@@ -9,7 +9,7 @@
 static const poly64_t crc24_a_data[] = {
     // (1<<(64*k)) mod P_CRC, for k = 10
     0xa38dea0000000000,
-    // (1<<128) / P - (1<<64)
+    // (1<<128) / P_CRC - (1<<64)
     0xf845fe2493242da4,
     // (1<<(64*k)) mod P_CRC, for k in [1,1,2,3,4,5,6,7,8,9]
     0x864cfb0000000000, 0x864cfb0000000000, 0xfd7e0c0000000000,
diff --git a/src/UpperPHY/CRC/arm_crc24_b.cpp b/src/UpperPHY/CRC/arm_crc24_b.cpp
index 13855249e092c4cb7e608b14acd4a98eb01942c3..6de6116c46ec313c742e67615c15d42ad77ce4bf 100644
--- a/src/UpperPHY/CRC/arm_crc24_b.cpp
+++ b/src/UpperPHY/CRC/arm_crc24_b.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "crc_common.hpp"
@@ -9,7 +9,7 @@
 static const poly64_t crc24_b_data[] = {
     // (1<<(64*k)) mod P_CRC, for k = 10
     0xdf24f50000000000,
-    // (1<<128) / P - (1<<64)
+    // (1<<128) / P_CRC - (1<<64)
     0xffff83ffe007f83e,
     // (1<<(64*k)) mod P_CRC, for k in [1,1,2,3,4,5,6,7,8,9]
     0x8000630000000000, 0x8000630000000000, 0x0900020000000000,
diff --git a/src/UpperPHY/CRC/arm_crc24_c.cpp b/src/UpperPHY/CRC/arm_crc24_c.cpp
index f1f52fa159759d7627f07f2c574d23bbc19fd40e..0e5e4a77525de93a9bdc771f0d96004e52cb10e5 100644
--- a/src/UpperPHY/CRC/arm_crc24_c.cpp
+++ b/src/UpperPHY/CRC/arm_crc24_c.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "crc_common.hpp"
@@ -9,7 +9,7 @@
 static const poly64_t crc24_c_data[] = {
     // (1<<(64*k)) mod P_CRC, for k = 10
     0x563dff0000000000,
-    // (1<<128) / P - (1<<64)
+    // (1<<128) / P_CRC - (1<<64)
     0xc52cdcad524ab8e3,
     // (1<<(64*k)) mod P_CRC, for k in [1,1,2,3,4,5,6,7,8,9]
     0xb2b1170000000000, 0xb2b1170000000000, 0x1397990000000000,
diff --git a/src/UpperPHY/CRC/arm_crc6.cpp b/src/UpperPHY/CRC/arm_crc6.cpp
index c9faf082f222c19933061d4a4482efe7912d5dac..f907683ebb9a181af569d8fba4a27db2cc73fbc1 100644
--- a/src/UpperPHY/CRC/arm_crc6.cpp
+++ b/src/UpperPHY/CRC/arm_crc6.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "crc_common.hpp"
@@ -9,7 +9,7 @@
 static const poly64_t crc6_data[] = {
     // (1<<(64*k)) mod P_CRC, for k = 10
     0x5400000000000000,
-    // (1<<128) / P - (1<<64)
+    // (1<<128) / P_CRC - (1<<64)
     0xfab376938bca3083,
     // (1<<(64*k)) mod P_CRC, for k in [1,1,2,3,4,5,6,7,8,9]
     0x8400000000000000, 0x8400000000000000, 0x8c00000000000000,
diff --git a/src/UpperPHY/CRC/crc_basic.hpp b/src/UpperPHY/CRC/crc_basic.hpp
index 06181efd0a5074ef9647130153eb3476b9b5269f..7c3dfcdad1f3c526917555dcacf18945422d8bb7 100644
--- a/src/UpperPHY/CRC/crc_basic.hpp
+++ b/src/UpperPHY/CRC/crc_basic.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/UpperPHY/CRC/crc_common.hpp b/src/UpperPHY/CRC/crc_common.hpp
index 39289e89748f5e37e7de8c374e8d1ce98d17e9a6..59460e0998ae1f858ef943a1dd67790f2cdcfe20 100644
--- a/src/UpperPHY/CRC/crc_common.hpp
+++ b/src/UpperPHY/CRC/crc_common.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
@@ -64,16 +64,17 @@ static inline poly64x2_t add_p64x2(poly64x2_t a, poly64x2_t b) {
  * @tparam     BarretShift     the shift used when computing @c ls1_divp.
  * @param[in]  size            number of bytes of the given buffer
  * @param[in]  input           points to the input byte sequence
- * @param[out] crc24           the computed CRC on 24 bits
+ * @param[out] crc             the computed CRC
  * @param[in]  constants       the constants specific to each polynomial:
                                constants[0] = padding
-                               constants[1] = (1<<128) / P - (1<<64)
-                               constants[2:11] = [ (1<<(64*k)) mod P,
+                               constants[1] = (1<<128) / P_CRC - (1<<64)
+                               constants[2:11] = [ (1<<(64*k)) mod P_CRC,
                                  for k in [1,1,2,3,4,5,6,7,8,9] ]
  */
 template<char Endianness>
-static void crc64(uint32_t size, const uint64_t *input, uint64_t *crc,
-                  const poly64_t constants[]) {
+static inline __attribute__((always_inline)) void
+crc64(uint32_t size, const uint64_t *input, uint64_t *crc,
+      const poly64_t constants[]) {
   const poly64_t *p_in = (const poly64_t *)input;
 
   if (size == 8) {
diff --git a/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_decoder.cpp b/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_decoder.cpp
index 1a6f71b84805a34447678f772d056fc09017ee2b..43cd7da4155aa01b21a603ae306570cc3f3e2949 100644
--- a/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_decoder.cpp
+++ b/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_decoder.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "bit_utils.hpp"
@@ -40,23 +40,6 @@ void compute_path(uint8_t *dec, uint32_t k, uint8_t states, uint8_t const *prev,
   *i_ptr = i;
 }
 
-[[maybe_unused]] int32_t compute_bm(int8_t s0, int8_t s1, int8_t s2, int8_t t0,
-                                    int8_t t1, int8_t t2) {
-  int32_t bm = 0;
-
-  // In the AWGN case, minimizing the branch metric bm = sum_{i=1}^3 d_i^2
-  // is identical to maximizing the log likelihood along different paths, since
-  // LL is proportional to -(sum_{i=1}^3 d_i^2).
-
-  bm += abs(s0 - t0);
-
-  bm += abs(s1 - t1);
-
-  bm += abs(s2 - t2);
-
-  return bm;
-}
-
 int cmp(const void *a, const void *b) {
   int ret;
   const pm_s ia = *(const pm_s *)a;
diff --git a/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_encoder.cpp b/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_encoder.cpp
index 19fe7adf369126bf89239dbd1aeb1406c39e476e..c936f704f699c29311d28d34ac540d470cfa21fb 100644
--- a/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_encoder.cpp
+++ b/src/UpperPHY/ConvolutionalEncoder/arm_convolutional_encoder.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/UpperPHY/ConvolutionalEncoder/convolutional_code_table.hpp b/src/UpperPHY/ConvolutionalEncoder/convolutional_code_table.hpp
index 6fe6dc1e8280025e308ed82fb647dd17d553263b..469b2529785543953fff4a609b28e74cac44c915 100644
--- a/src/UpperPHY/ConvolutionalEncoder/convolutional_code_table.hpp
+++ b/src/UpperPHY/ConvolutionalEncoder/convolutional_code_table.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 namespace {
diff --git a/src/UpperPHY/Demodulation/arm_demodulation.c b/src/UpperPHY/Demodulation/arm_demodulation.c
index 8046a4cd03316d13cbba1453fa69c14161cd36c2..2a30828264557f5ac46ec3c8500853ec7432ccf2 100644
--- a/src/UpperPHY/Demodulation/arm_demodulation.c
+++ b/src/UpperPHY/Demodulation/arm_demodulation.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/UpperPHY/LDPC/ldpc_coding.hpp b/src/UpperPHY/LDPC/ldpc_coding.hpp
index 4b2739b4e578984aabc36a87573252e2aa0a6ee5..0d4fa9b0b819af397aff9a4637571e92ef353e03 100644
--- a/src/UpperPHY/LDPC/ldpc_coding.hpp
+++ b/src/UpperPHY/LDPC/ldpc_coding.hpp
@@ -1,12 +1,13 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
 #include "armral.h"
 
-namespace armral_ldpc {
+namespace armral::ldpc {
+
 constexpr uint32_t num_lifting_sets = 8;
 
 uint32_t get_ldpc_lifting_index(uint32_t lifting_size);
@@ -16,4 +17,4 @@ void decode_block(const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z,
                   uint32_t crc_idx, uint32_t num_its, uint8_t *data_out,
                   Allocator &allocator);
 
-} // namespace armral_ldpc
+} // namespace armral::ldpc
diff --git a/src/UpperPHY/LDPC/ldpc_decoder.cpp b/src/UpperPHY/LDPC/ldpc_decoder.cpp
index 99e26ef663548e0a58b845aecf83557c594d3ff6..19480003f2fcadfb7c21987e37f6709594ca5b38 100644
--- a/src/UpperPHY/LDPC/ldpc_decoder.cpp
+++ b/src/UpperPHY/LDPC/ldpc_decoder.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "bit_utils.hpp"
@@ -38,7 +38,7 @@ struct ldpc_layer_data {
     row_start_ind = graph->row_start_inds[row];
     col_ptr += num_cols;
     num_cols = graph->row_start_inds[row + 1] - row_start_ind;
-    shift_ptr = graph->shifts + row_start_ind * armral_ldpc::num_lifting_sets +
+    shift_ptr = graph->shifts + row_start_ind * armral::ldpc::num_lifting_sets +
                 lsi * num_cols;
   }
 };
@@ -113,13 +113,13 @@ private:
 template<lifting_size_category>
 bool parity_check(const int8_t *llrs, uint32_t z, uint32_t lsi,
                   const armral_ldpc_base_graph_t *graph, int32_t num_lanes,
-                  int32_t full_vec, int32_t tail_size);
+                  int32_t full_vec, int32_t tail_size, int8_t *check);
 
 template<>
 bool parity_check<CAT_TINY>(const int8_t *llrs, uint32_t z, uint32_t lsi,
                             const armral_ldpc_base_graph_t *graph,
                             int32_t num_lanes, int32_t full_vec,
-                            int32_t tail_size) {
+                            int32_t tail_size, int8_t *check_array) {
   // Loop through the rows in the base graph
   bool passed = true;
   for (uint32_t row = 0; row < graph->nrows && passed; ++row) {
@@ -127,18 +127,18 @@ bool parity_check<CAT_TINY>(const int8_t *llrs, uint32_t z, uint32_t lsi,
     auto num_cols = graph->row_start_inds[row + 1] - row_start_ind;
     const auto *col_ptr = graph->col_inds + row_start_ind;
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * num_cols;
     // Loop through the rows in the block
     for (uint32_t zb = 0; zb < z && passed; ++zb) {
       // Loop through the columns in the row
-      int8_t check = 0;
+      int8_t scal_check = 0;
       for (uint32_t col = 0; col < num_cols; ++col) {
         auto shift = (shift_ptr[col] + zb) % z;
         auto codeword_ind = col_ptr[col] * z + shift;
-        check ^= llrs[codeword_ind];
+        scal_check ^= llrs[codeword_ind];
       }
-      passed &= check >= 0;
+      passed &= scal_check >= 0;
     }
   }
   return passed;
@@ -148,7 +148,7 @@ template<>
 bool parity_check<CAT_TAIL>(const int8_t *llrs, uint32_t z, uint32_t lsi,
                             const armral_ldpc_base_graph_t *graph,
                             int32_t num_lanes, int32_t full_vec,
-                            int32_t tail_size) {
+                            int32_t tail_size, int8_t *check) {
   // Loop through the rows in the base graph
   bool passed = true;
 #if ARMRAL_ARCH_SVE >= 2
@@ -158,9 +158,8 @@ bool parity_check<CAT_TAIL>(const int8_t *llrs, uint32_t z, uint32_t lsi,
     auto num_cols = graph->row_start_inds[row + 1] - row_start_ind;
     const auto *col_ptr = graph->col_inds + row_start_ind;
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * num_cols;
-    int8_t check[z];
     memset(check, 0, z * sizeof(int8_t));
 
     // Loop through the columns
@@ -187,9 +186,8 @@ bool parity_check<CAT_TAIL>(const int8_t *llrs, uint32_t z, uint32_t lsi,
     auto num_cols = graph->row_start_inds[row + 1] - row_start_ind;
     const auto *col_ptr = graph->col_inds + row_start_ind;
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * num_cols;
-    int8_t check[z];
     memset(check, 0, z * sizeof(int8_t));
 
     // Loop through the columns
@@ -223,7 +221,7 @@ template<>
 bool parity_check<CAT_LARGE>(const int8_t *llrs, uint32_t z, uint32_t lsi,
                              const armral_ldpc_base_graph_t *graph,
                              int32_t num_lanes, int32_t full_vec,
-                             int32_t tail_size) {
+                             int32_t tail_size, int8_t *check) {
 #if ARMRAL_ARCH_SVE >= 2
   svbool_t pg = svptrue_b8();
   svbool_t pg_tail = svwhilelt_b8(0, tail_size);
@@ -235,9 +233,8 @@ bool parity_check<CAT_LARGE>(const int8_t *llrs, uint32_t z, uint32_t lsi,
     auto num_cols = graph->row_start_inds[row + 1] - row_start_ind;
     const auto *col_ptr = graph->col_inds + row_start_ind;
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * num_cols;
-    int8_t check[z];
     memset(check, 0, z * sizeof(int8_t));
 
     // Loop through the columns
@@ -285,9 +282,8 @@ bool parity_check<CAT_LARGE>(const int8_t *llrs, uint32_t z, uint32_t lsi,
     auto num_cols = graph->row_start_inds[row + 1] - row_start_ind;
     const auto *col_ptr = graph->col_inds + row_start_ind;
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * num_cols;
-    int8_t check[z];
     memset(check, 0, z * sizeof(int8_t));
 
     // Loop through the columns
@@ -532,7 +528,6 @@ void compute_l_product_min1_and_min2<CAT_TAIL>(
 
   vst1_s8(l_ptr, l_reg);
 
-  int8_t scalar_min2[d->z];
   for (uint32_t zb = d->z - tail_size; zb < d->z; ++zb) {
     l_val = llrs_ptr[zb] - r_ptr[zb];
 
@@ -576,7 +571,7 @@ void compute_l_product_min1_and_min2<CAT_TAIL>(
 
       int8_t abs_val = vqabsb_s8(l_val);
       row_min2_array[zb] =
-          max(row_min_array[zb], min(scalar_min2[zb], abs_val));
+          max(row_min_array[zb], min(row_min2_array[zb], abs_val));
       row_min_array[zb] = min(row_min_array[zb], abs_val);
 
       l_ptr[zb] = l_val;
@@ -1301,7 +1296,7 @@ run_iterations(uint32_t num_its, int z, int lsi,
                const armral_ldpc_base_graph_t *graph, int8_t *r, int8_t *l,
                int8_t *new_llrs, int num_lanes, int full_vec, int tail_size,
                int8_t *row_min_array, int8_t *row_min2_array,
-               int8_t *row_sign_array, bool check_convergence,
+               int8_t *row_sign_array, int8_t *check, bool check_convergence,
                std::optional<crc_checker<Allocator>> &crc_checker) {
   for (uint32_t i = 0; i < num_its; ++i) {
     ldpc_layer_data d(z, lsi, graph);
@@ -1333,7 +1328,7 @@ run_iterations(uint32_t num_its, int z, int lsi,
     bool crc_passed = crc_checker.has_value() && crc_checker->check(new_llrs);
     if (check_convergence &&
         (crc_passed || parity_check<Cat>(new_llrs, z, lsi, graph, num_lanes,
-                                         full_vec, tail_size))) {
+                                         full_vec, tail_size, check))) {
       break;
     }
   }
@@ -1342,12 +1337,12 @@ run_iterations(uint32_t num_its, int z, int lsi,
 } // anonymous namespace
 
 template<bool check_convergence, typename Allocator>
-void armral_ldpc::decode_block(const int8_t *llrs, armral_ldpc_graph_t bg,
-                               uint32_t z, uint32_t crc_idx, uint32_t num_its,
-                               uint8_t *data_out, Allocator &allocator) {
+void armral::ldpc::decode_block(const int8_t *llrs, armral_ldpc_graph_t bg,
+                                uint32_t z, uint32_t crc_idx, uint32_t num_its,
+                                uint8_t *data_out, Allocator &allocator) {
   // Get the base graph and the lifting size
   const auto *graph = armral_ldpc_get_base_graph(bg);
-  uint32_t lsi = armral_ldpc::get_ldpc_lifting_index(z);
+  uint32_t lsi = armral::ldpc::get_ldpc_lifting_index(z);
 
   // Only allocate the CRC checker if necessary.
   std::optional<crc_checker<Allocator>> maybe_crc_checker;
@@ -1373,6 +1368,8 @@ void armral_ldpc::decode_block(const int8_t *llrs, armral_ldpc_graph_t bg,
   auto row_min2_array = allocate_zeroed<int8_t>(allocator, z);
   auto row_sign_array = allocate_zeroed<int8_t>(allocator, z);
 
+  auto check = allocate_zeroed<int8_t>(allocator, z);
+
 #if ARMRAL_ARCH_SVE >= 2
   bool z_is_tiny = (z == 2);
 #else
@@ -1435,24 +1432,27 @@ void armral_ldpc::decode_block(const int8_t *llrs, armral_ldpc_graph_t bg,
 #endif
 
   if (z_is_tiny) {
-    run_iterations<CAT_TINY>(
-        num_its, z, lsi, graph, r.get(), l.get(), new_llrs.get(), num_lanes,
-        full_vec, tail_size, row_min_array.get(), row_min2_array.get(),
-        row_sign_array.get(), check_convergence, maybe_crc_checker);
+    run_iterations<CAT_TINY>(num_its, z, lsi, graph, r.get(), l.get(),
+                             new_llrs.get(), num_lanes, full_vec, tail_size,
+                             row_min_array.get(), row_min2_array.get(),
+                             row_sign_array.get(), check.get(),
+                             check_convergence, maybe_crc_checker);
 
     // Hard decode into the output variable
     llrs_to_bits(num_llrs, new_llrs.get(), data_out);
   } else {
     if (is_tail_only) {
-      run_iterations<CAT_TAIL>(
-          num_its, z, lsi, graph, r.get(), l.get(), new_llrs.get(), num_lanes,
-          full_vec, tail_size, row_min_array.get(), row_min2_array.get(),
-          row_sign_array.get(), check_convergence, maybe_crc_checker);
+      run_iterations<CAT_TAIL>(num_its, z, lsi, graph, r.get(), l.get(),
+                               new_llrs.get(), num_lanes, full_vec, tail_size,
+                               row_min_array.get(), row_min2_array.get(),
+                               row_sign_array.get(), check.get(),
+                               check_convergence, maybe_crc_checker);
     } else {
-      run_iterations<CAT_LARGE>(
-          num_its, z, lsi, graph, r.get(), l.get(), new_llrs.get(), num_lanes,
-          full_vec, tail_size, row_min_array.get(), row_min2_array.get(),
-          row_sign_array.get(), check_convergence, maybe_crc_checker);
+      run_iterations<CAT_LARGE>(num_its, z, lsi, graph, r.get(), l.get(),
+                                new_llrs.get(), num_lanes, full_vec, tail_size,
+                                row_min_array.get(), row_min2_array.get(),
+                                row_sign_array.get(), check.get(),
+                                check_convergence, maybe_crc_checker);
     }
     // Pack LLRs, copy back to original storage
     auto *out_llrs = maybe_out_llrs.value().get();
@@ -1467,11 +1467,11 @@ void armral_ldpc::decode_block(const int8_t *llrs, armral_ldpc_graph_t bg,
   }
 }
 
-template void armral_ldpc::decode_block<false, heap_allocator>(
+template void armral::ldpc::decode_block<false, heap_allocator>(
     const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, uint32_t crc_idx,
     uint32_t num_its, uint8_t *data_out, heap_allocator &);
 
-template void armral_ldpc::decode_block<false, buffer_bump_allocator>(
+template void armral::ldpc::decode_block<false, buffer_bump_allocator>(
     const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, uint32_t crc_idx,
     uint32_t num_its, uint8_t *data_out, buffer_bump_allocator &);
 
@@ -1480,8 +1480,8 @@ armral_status armral_ldpc_decode_block(const int8_t *llrs,
                                        uint32_t crc_idx, uint32_t num_its,
                                        uint8_t *data_out) {
   heap_allocator allocator{};
-  armral_ldpc::decode_block<true>(llrs, bg, z, crc_idx, num_its, data_out,
-                                  allocator);
+  armral::ldpc::decode_block<true>(llrs, bg, z, crc_idx, num_its, data_out,
+                                   allocator);
   return ARMRAL_SUCCESS;
 }
 
@@ -1490,8 +1490,8 @@ armral_ldpc_decode_block_noalloc(const int8_t *llrs, armral_ldpc_graph_t bg,
                                  uint32_t z, uint32_t crc_idx, uint32_t num_its,
                                  uint8_t *data_out, void *buffer) {
   buffer_bump_allocator allocator{buffer};
-  armral_ldpc::decode_block<true>(llrs, bg, z, crc_idx, num_its, data_out,
-                                  allocator);
+  armral::ldpc::decode_block<true>(llrs, bg, z, crc_idx, num_its, data_out,
+                                   allocator);
   return ARMRAL_SUCCESS;
 }
 
@@ -1500,7 +1500,7 @@ uint32_t armral_ldpc_decode_block_noalloc_buffer_size(armral_ldpc_graph_t bg,
                                                       uint32_t crc_idx,
                                                       uint32_t num_its) {
   counting_allocator allocator{};
-  armral_ldpc::decode_block<true>(nullptr, bg, z, crc_idx, num_its, nullptr,
-                                  allocator);
+  armral::ldpc::decode_block<true>(nullptr, bg, z, crc_idx, num_its, nullptr,
+                                   allocator);
   return allocator.required_bytes();
 }
diff --git a/src/UpperPHY/LDPC/ldpc_encoder.cpp b/src/UpperPHY/LDPC/ldpc_encoder.cpp
index 20ee82b97f15f73e7dd518cd5bc799a08a8fea13..74a8fe4ba5eb7817982dce0a37adc0f0ff2b2a89 100644
--- a/src/UpperPHY/LDPC/ldpc_encoder.cpp
+++ b/src/UpperPHY/LDPC/ldpc_encoder.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "bit_utils.hpp"
@@ -950,7 +950,7 @@ inline void set_parity_hdsm_bg1_lsi_not_6(uint32_t z,
   const uint8_t *ptr_agg = agg_parity;
   const uint8_t *ptr_hdsm = parity_hdsm;
 
-  // Process 16 entries  at a time
+  // Process 16 entries at a time
   uint32_t blk_cnt = (z - 1) >> 4U;
 
   while (blk_cnt > 0U) {
@@ -1036,7 +1036,7 @@ inline void set_parity_hdsm_bg1_lsi_6(uint32_t z, const uint8_t *parity_hdsm,
     const uint8_t *ptr_hdsm = parity_hdsm;
     // zb = 0 to 104
 
-    // Process 16 uint8_t  at a time
+    // Process 16 uint8_t at a time
     uint32_t blk_cnt = 6; // 105/16
     while (blk_cnt > 0U) {
 
@@ -1103,7 +1103,7 @@ inline void set_parity_hdsm_bg1_lsi_6(uint32_t z, const uint8_t *parity_hdsm,
     data_out += 1;
 
     // zb = 105 to 207
-    // Process 16 uint8_t  at a time
+    // Process 16 uint8_t at a time
     blk_cnt = 6; // 103/16
     while (blk_cnt > 0U) {
 
@@ -1140,7 +1140,7 @@ inline void set_parity_hdsm_bg1_lsi_6(uint32_t z, const uint8_t *parity_hdsm,
       codeword[(25 * z) + zb] = parity_hdsm[3 * z + zb] ^ agg_parity[zb - 105];
     }
   } else { // z != 208
-    // Deal with the first row  of the loop (zb =0)
+    // Deal with the first row of the loop (zb =0)
     {
       codeword[22 * z] = agg_parity[z - 1];
       codeword[23 * z] = parity_hdsm[0] ^ agg_parity[z - 1];
@@ -1154,7 +1154,7 @@ inline void set_parity_hdsm_bg1_lsi_6(uint32_t z, const uint8_t *parity_hdsm,
     const uint8_t *ptr_agg = agg_parity + 1;
     const uint8_t *ptr_hdsm = parity_hdsm + 1;
 
-    // Process 16 uint8_t  at a time
+    // Process 16 uint8_t at a time
     uint32_t blk_cnt = (z - 1) >> 4U;
     while (blk_cnt > 0U) {
 
@@ -1225,7 +1225,7 @@ inline void set_parity_hdsm_bg2_lsi_not_3_nor_7(uint32_t z,
                                                 const uint8_t *parity_hdsm,
                                                 const uint8_t *agg_parity,
                                                 uint8_t *codeword) {
-  // Deal with the first row  of the loop (zb =0)
+  // Deal with the first row of the loop (zb =0)
   {
     codeword[10 * z] = agg_parity[z - 1];
     codeword[11 * z] = parity_hdsm[0] ^ agg_parity[z - 1];
@@ -1237,7 +1237,7 @@ inline void set_parity_hdsm_bg2_lsi_not_3_nor_7(uint32_t z,
   const uint8_t *ptr_agg = agg_parity + 1;
   const uint8_t *ptr_hdsm = parity_hdsm + 1;
 
-  // Process 16 entries  at a time
+  // Process 16 entries at a time
   uint32_t blk_cnt = (z - 1) >> 4U;
 
   while (blk_cnt > 0U) {
@@ -1311,7 +1311,7 @@ inline void set_parity_hdsm_bg2_lsi_3_or_7(uint32_t z,
   const uint8_t *ptr_agg = agg_parity;
   const uint8_t *ptr_hdsm = parity_hdsm;
 
-  // Process 16 entries  at a time
+  // Process 16 entries at a time
   uint32_t blk_cnt = (z - 1) >> 4U;
 
   while (blk_cnt > 0U) {
@@ -1467,7 +1467,7 @@ inline void calc_extension_parity(uint32_t z, uint32_t lsi,
     // the number of index sets (8), and then the lifting set index
     // is added to this
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * col_entries;
     uint32_t j = 0;
     for (; j < col_entries && col_ptr[j] < max_ind; ++j) {
@@ -1480,7 +1480,7 @@ inline void calc_extension_parity(uint32_t z, uint32_t lsi,
       auto *codeword_ptr = codeword + block_col * z + shift;
 
       // Vectorization of the inner loops
-      // Process 16 entries  at a time
+      // Process 16 entries at a time
       uint32_t blk_cnt = (z - shift) >> 4U;
       while (blk_cnt > 0U) {
         // Load inputs
@@ -1514,7 +1514,7 @@ inline void calc_extension_parity(uint32_t z, uint32_t lsi,
       // Process zb = 0 to shift -1
       codeword_ptr = codeword + block_col * z;
 
-      // Process 16 entries  at a time
+      // Process 16 entries at a time
       blk_cnt = shift >> 4U;
       while (blk_cnt > 0U) {
         // Load inputs
@@ -1562,7 +1562,7 @@ inline void spmv_hdsm(uint32_t z, uint32_t lsi,
     // is first offset by the row start index multiplied by
     // the number of index sets (8), and then
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * col_entries;
     uint32_t j = 0;
     for (; j < col_entries && col_ptr[j] < graph->nmessage_bits; ++j) {
@@ -1575,7 +1575,7 @@ inline void spmv_hdsm(uint32_t z, uint32_t lsi,
       auto *in_ptr = bytes_in + block_col * z + shift;
 
       // Vectorization of the inner loops
-      // Process 16 entries  at a time
+      // Process 16 entries at a time
       uint32_t blk_cnt = (z - shift) >> 4U;
       while (blk_cnt > 0U) {
         // Load inputs
@@ -1609,7 +1609,7 @@ inline void spmv_hdsm(uint32_t z, uint32_t lsi,
       // Process zb = 0 to shift - 1
       in_ptr = bytes_in + block_col * z;
 
-      // Process 16 entries  at a time
+      // Process 16 entries at a time
       blk_cnt = shift >> 4U;
       while (blk_cnt > 0U) {
         // Load inputs
@@ -1652,7 +1652,7 @@ inline void copy_input_message(uint32_t z,
     uint8_t *out_ptr = codeword + j * z;
     const uint8_t *in_ptr = bytes_in + j * z;
 
-    // Process 16 entries  at a time
+    // Process 16 entries at a time
     uint32_t blk_cnt = z >> 4U;
     while (blk_cnt > 0U) {
       // Load inputs
@@ -1686,7 +1686,7 @@ inline void calc_hdsm_rhs(uint32_t z, const uint8_t *parity_hdsm,
   // First iteration, tmp_parity is vector of 0
   uint8_t *out_ptr = tmp_parity;
   const uint8_t *in_ptr = parity_hdsm;
-  // Process 16 entries  at a time
+  // Process 16 entries at a time
   uint32_t blk_cnt = z >> 4U;
   while (blk_cnt > 0U) {
     // Load inputs
@@ -1721,7 +1721,7 @@ inline void calc_hdsm_rhs(uint32_t z, const uint8_t *parity_hdsm,
   for (uint32_t j = 1; j < 4; ++j) {
     out_ptr = tmp_parity;
     in_ptr = parity_hdsm + z * j;
-    // Process 16 entries  at a time
+    // Process 16 entries at a time
     blk_cnt = z >> 4U;
     while (blk_cnt > 0U) {
       // Load inputs
@@ -1767,6 +1767,7 @@ armral_status ldpc_encode_block(const uint8_t *data_in, armral_ldpc_graph_t bg,
   auto parity_hdsm = allocate_zeroed<uint8_t>(allocator, 4 * z);
   auto codeword =
       allocate_zeroed<uint8_t>(allocator, (graph->ncodeword_bits + 2) * z);
+  auto tmp_parity = allocate_zeroed<uint8_t>(allocator, z);
 
   if constexpr (Allocator::is_counting) {
     return ARMRAL_SUCCESS;
@@ -1777,7 +1778,7 @@ armral_status ldpc_encode_block(const uint8_t *data_in, armral_ldpc_graph_t bg,
                 bytes_in.get());
 
   // Get the lifting set index
-  auto lsi = armral_ldpc::get_ldpc_lifting_index(z);
+  auto lsi = armral::ldpc::get_ldpc_lifting_index(z);
 
   // The encoding is done by computing:
   // 1- Parity bits for the high-density submatrix (hdsm)
@@ -1792,11 +1793,10 @@ armral_status ldpc_encode_block(const uint8_t *data_in, armral_ldpc_graph_t bg,
 
   // Build the right-hand side of the linear systems
   // to solve for hdsm parity computation
-  uint8_t tmp_parity[z];
-  calc_hdsm_rhs(z, parity_hdsm.get(), tmp_parity);
+  calc_hdsm_rhs(z, parity_hdsm.get(), tmp_parity.get());
 
   // Finally, computation of hdsm parity bits
-  calc_hdsm_parity(z, lsi, bg, graph, parity_hdsm.get(), tmp_parity,
+  calc_hdsm_parity(z, lsi, bg, graph, parity_hdsm.get(), tmp_parity.get(),
                    codeword.get());
 
   // 2- Parity bits for the extension matrix.
@@ -1848,7 +1848,7 @@ armral_ldpc_get_base_graph(armral_ldpc_graph_t bg) {
   return bg == LDPC_BASE_GRAPH_1 ? &base_graph_1 : &base_graph_2;
 }
 
-uint32_t armral_ldpc::get_ldpc_lifting_index(uint32_t lifting_size) {
+uint32_t armral::ldpc::get_ldpc_lifting_index(uint32_t lifting_size) {
   // Each lifting size is either a power of two,
   // or an odd multiple (up to 15) of a power of two. Find the first odd
   // number when shifting right,
diff --git a/src/UpperPHY/LDPC/ldpc_rate_common.hpp b/src/UpperPHY/LDPC/ldpc_rate_common.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3858f49fd118fbe0b6ccc8d7548929312aaff247
--- /dev/null
+++ b/src/UpperPHY/LDPC/ldpc_rate_common.hpp
@@ -0,0 +1,30 @@
+/*
+    Arm RAN Acceleration Library
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+*/
+#pragma once
+
+#include "armral.h"
+
+namespace {
+
+uint32_t starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
+                           uint32_t ncb, uint32_t z) {
+  // Starting position k0 of different redundancy versions
+  // given as Table 5.4.2.1-2 in 3GPP TS 38.212.
+  if (rv == 0) {
+    return 0;
+  }
+  if (rv == 1) {
+    return (17 * z - (int)bg * 4 * z) * (ncb / n);
+  }
+  if (rv == 2) {
+    return (33 * z - (int)bg * 8 * z) * (ncb / n);
+  }
+  if (rv == 3) {
+    return (56 * z - (int)bg * 13 * z) * (ncb / n);
+  }
+  return 0;
+}
+
+} // anonymous namespace
diff --git a/src/UpperPHY/LDPC/ldpc_rate_matching.cpp b/src/UpperPHY/LDPC/ldpc_rate_matching.cpp
index 40952c0dc059c62b1d92c9bb082f08a49a2046a8..2324c2f9eff078dc066d058a0f3136def8080d50 100644
--- a/src/UpperPHY/LDPC/ldpc_rate_matching.cpp
+++ b/src/UpperPHY/LDPC/ldpc_rate_matching.cpp
@@ -1,9 +1,10 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "bit_utils.hpp"
+#include "ldpc_rate_common.hpp"
 #include "utils/allocators.hpp"
 #include <cassert>
 #include <cmath>
@@ -21,13 +22,17 @@ void copy_bits(uint32_t src_bit, uint32_t start_idx, uint32_t len, uint32_t l,
   }
 }
 
-static void bit_selection(uint32_t z, uint32_t n, uint32_t e,
-                          uint32_t len_filler_bits, uint32_t k, uint32_t k0,
-                          const uint8_t *in, uint8_t *out,
-                          uint8_t *scratch_buf1, uint8_t *scratch_buf2) {
+void bit_selection(uint32_t z, uint32_t n, uint32_t e, uint32_t len_filler_bits,
+                   uint32_t k, uint32_t k0, const uint8_t *in, uint8_t *out,
+                   uint8_t *scratch_buf1, uint8_t *scratch_buf2) {
+  assert(n > 0);
+  assert(e > 0);
+  assert(k0 < n);
+  assert(n % 2 == 0);
+
   const uint8_t *in_bits = in;
   // bit selection as specified by section 5.4.2.1 in 3GPP TS 38.212
-  // remove Filler bits
+  // remove filler bits
   if (len_filler_bits > 0) {
 
     uint32_t len_s_f_bits = k - z * 2; // length of systematic & filler bits
@@ -69,10 +74,12 @@ static void bit_selection(uint32_t z, uint32_t n, uint32_t e,
   }
 }
 
-static void bit_interleave(uint32_t e, uint32_t qm, const uint8_t *in,
-                           uint8_t *out) {
+void bit_interleave(uint32_t e, uint32_t qm, const uint8_t *in, uint8_t *out) {
   // performs the bit interleaving step of LDPC encoding, as specified in
   // section 5.4.2.2 of 3GPP TS 38.212.
+
+  assert(e % qm == 0);
+
   memset((void *)out, 0, (e + 7) / 8);
 
   // transpose
@@ -88,25 +95,6 @@ static void bit_interleave(uint32_t e, uint32_t qm, const uint8_t *in,
   }
 }
 
-static int starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
-                             uint32_t ncb, uint32_t z) {
-  // Starting position k0 of different redundancy versions
-  // given as Table 5.4.2.1-2 in 3GPP TS 38.212.
-  if (rv == 0) {
-    return 0;
-  }
-  if (rv == 1) {
-    return (17 * z - (int)bg * 4 * z) * (ncb / n);
-  }
-  if (rv == 2) {
-    return (33 * z - (int)bg * 8 * z) * (ncb / n);
-  }
-  if (rv == 3) {
-    return (56 * z - (int)bg * 13 * z) * (ncb / n);
-  }
-  return 0;
-}
-
 template<typename Allocator>
 armral_status rate_matching(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
                             uint32_t nref, uint32_t len_filler_bits, uint32_t k,
diff --git a/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp b/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp
index c46165e5cea2487d46a3b0e3f0837fb1c4c37c59..6fa9b6c3acf449d23967d6919d6e4a9eb4a841c0 100644
--- a/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp
+++ b/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp
@@ -1,8 +1,9 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
+#include "ldpc_rate_common.hpp"
 #include "utils/allocators.hpp"
 
 #include <cassert>
@@ -28,10 +29,10 @@ void undo_selection(uint32_t z, uint32_t n, uint32_t e,
   // performs the inverse of the bit selection as specified by
   // section 5.4.2.1 in 3GPP TS 38.212
 
-  assert(k0 >= 0 && k0 < n);
+  assert(k0 < n);
   assert(e > 0);
 
-  //  systematic bits len
+  // systematic bits len
   uint32_t len_s_bits = k - len_filler_bits - (2 * z);
   uint32_t k_idx = 0;
   uint32_t k0_start = k0;
@@ -97,6 +98,7 @@ void undo_selection(uint32_t z, uint32_t n, uint32_t e,
 void undo_interleave(uint32_t e, uint32_t qm, const int8_t *in, int8_t *out) {
   // performs the inverse of the bit interleaving step of LDPC encoding,
   // as specified in section 5.4.2.2 of 3GPP TS 38.212.
+
   assert(e > qm);
   assert(qm > 0);
   assert(e % qm == 0);
@@ -111,26 +113,6 @@ void undo_interleave(uint32_t e, uint32_t qm, const int8_t *in, int8_t *out) {
   }
 }
 
-static int starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
-                             uint32_t ncb, uint32_t z) {
-  // Duplicate of routine of the same name in ldpc_rate_matching.cpp
-  // Starting position k0 of different redundancy versions
-  // given as Table 5.4.2.1-2 in 3GPP TS 38.212.
-  if (rv == 0) {
-    return 0;
-  }
-  if (rv == 1) {
-    return (17 * z - (int)bg * 4 * z) * (ncb / n);
-  }
-  if (rv == 2) {
-    return (33 * z - (int)bg * 8 * z) * (ncb / n);
-  }
-  if (rv == 3) {
-    return (56 * z - (int)bg * 13 * z) * (ncb / n);
-  }
-  return 0;
-}
-
 template<typename Allocator>
 armral_status rate_recovery(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
                             uint32_t nref, uint32_t len_filler_bits, uint32_t k,
diff --git a/src/UpperPHY/Modulation/arm_modulation.c b/src/UpperPHY/Modulation/arm_modulation.c
index ab58bae6960c3040545b63b3521b4acb1914f611..96c91bb83b6165ec629985a5927a4f9910c915ef 100644
--- a/src/UpperPHY/Modulation/arm_modulation.c
+++ b/src/UpperPHY/Modulation/arm_modulation.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
diff --git a/src/UpperPHY/Polar/arm_polar_crc_attachment.cpp b/src/UpperPHY/Polar/arm_polar_crc_attachment.cpp
index 73f2fbd2dad841ad07d385045e3e188fcfebe6f8..3925063a5e5ef28ac08add6df29a2e7be205a8b1 100644
--- a/src/UpperPHY/Polar/arm_polar_crc_attachment.cpp
+++ b/src/UpperPHY/Polar/arm_polar_crc_attachment.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "utils/allocators.hpp"
diff --git a/src/UpperPHY/Polar/arm_polar_crc_check.cpp b/src/UpperPHY/Polar/arm_polar_crc_check.cpp
index 77d928d9a5261e9593b36e1ec4d6b7a98ccafc34..f88507361e96e9f524a81ea0f0fd536d0e698da1 100644
--- a/src/UpperPHY/Polar/arm_polar_crc_check.cpp
+++ b/src/UpperPHY/Polar/arm_polar_crc_check.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "utils/allocators.hpp"
diff --git a/src/UpperPHY/Polar/arm_polar_decoder.cpp b/src/UpperPHY/Polar/arm_polar_decoder.cpp
index 2cf77b672bea150ce490e671a36449210aee8ec3..4c4cc8d87c137ed0542a2b010229e5972959d62a 100644
--- a/src/UpperPHY/Polar/arm_polar_decoder.cpp
+++ b/src/UpperPHY/Polar/arm_polar_decoder.cpp
@@ -1,336 +1,37 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
 
 #include <algorithm>
-#include <array>
-#include <assert.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include <type_traits>
+#include <cassert>
+#include <cstring>
 
-namespace {
-
-static inline uint8x16_t vld_histq_l8(const uint8_t *hist) {
-  return vreinterpretq_u8_u64(vld1q_dup_u64((const uint64_t *)hist));
-}
-
-static inline uint8x16_t vld_histq_l4(const uint8_t *hist) {
-  return vreinterpretq_u8_u32(vld1q_dup_u32((const uint32_t *)hist));
-}
-
-static inline uint8x16_t vld_histq_l2(const uint8_t *hist) {
-  return vreinterpretq_u8_u16(vld1q_dup_u16((const uint16_t *)hist));
-}
+#include "arm_polar_decoder_neon.hpp"
 
-static inline uint8x8_t vld_hist_l4(const uint8_t *hist) {
-  return vreinterpret_u8_u32(vld1_dup_u32((const uint32_t *)hist));
-}
-
-static inline uint8x8_t vld_hist_l2(const uint8_t *hist) {
-  return vreinterpret_u8_u16(vld1_dup_u16((const uint16_t *)hist));
-}
-
-static inline uint32x4_t vmlsl_u32_s16(uint32x4_t acc, uint16x4_t x,
-                                       uint16x4_t y) {
-  return vreinterpretq_u32_s32(vmlsl_s16(vreinterpretq_s32_u32(acc),
-                                         vreinterpret_s16_u16(x),
-                                         vreinterpret_s16_u16(y)));
-}
+namespace {
 
 typedef struct {
   const uint8_t *frozen_bits_mask;
   uint32_t curr_bit_idx;
 } sc_decoder;
 
-// Extract the sign of an integer
-static inline int8_t __attribute__((always_inline)) sign(int32_t x) {
-  return static_cast<int>(x > 0) - static_cast<int>(x < 0);
-}
-
-// calculate the minimum absolute value between two integers
-static inline int16_t __attribute__((always_inline))
-min(const int8_t x, const int8_t y) {
-  return abs(x) < abs(y) ? abs(x) : abs(y);
-}
-
-template<int L, int N>
-static inline void zip1_l(const int8_t *__restrict in, int8_t *__restrict out) {
-  static_assert(N % 16 == 0);
-  if constexpr (L == 8) {
-    for (int i = 0; i < N; i += 2) {
-      int8x16_t x1 = vld1q_dup_s8(in);
-      int8x16_t x2 = vld1q_dup_s8(in + 1);
-      vst1q_s8(out, vextq_s8(x1, x2, 8));
-      in += 2;
-      out += 16;
-    }
-  } else if constexpr (L == 4) {
-    for (int i = 0; i < N; i += 4) {
-      int8x8_t x0 = vld1s_s8(in);
-      int8x16_t x1 = vzip1l_s8(x0, x0);
-      int8x16_t x2 = vzip1q_s8(x1, x1);
-      vst1q_s8(out, x2);
-      in += 4;
-      out += 16;
-    }
-  } else if constexpr (L == 2) {
-    for (int i = 0; i < N; i += 8) {
-      int8x8_t x0 = vld1_s8(in);
-      int8x16_t x1 = vzip1l_s8(x0, x0);
-      vst1q_s8(out, x1);
-      in += 8;
-      out += 16;
-    }
-  } else {
-    assert(false && "unimplemented!");
-  }
-}
-
-// calculate beliefs for left children in SCL algorithm
-template<int length>
-static inline void f(const int8_t *r1, const int8_t *r2, int8_t *output) {
-  int16_t l = length >> 4;
-  while (l > 0) {
-    int8x16_t llr1 = vld1q_s8(r1);
-    int8x16_t llr2 = vld1q_s8(r2);
-    uint8x16_t sign_vect = vcltzq_s8(veorq_s8(llr1, llr2));
-    llr1 = vqabsq_s8(llr1);
-    llr2 = vqabsq_s8(llr2);
-    int8x16_t result = vminq_s8(llr1, llr2);
-    int8x16_t result_neg = vnegq_s8(result);
-    result = vbslq_s8(sign_vect, result_neg, result);
-    vst1q_s8(output, result);
-    l--;
-    r1 += 16;
-    r2 += 16;
-    output += 16;
-  }
-
-  if ((length >> 3) & 1) {
-    int8x8_t llr1 = vld1_s8(r1);
-    int8x8_t llr2 = vld1_s8(r2);
-    uint8x8_t sign_vect = vcltz_s8(veor_s8(llr1, llr2));
-    llr1 = vqabs_s8(llr1);
-    llr2 = vqabs_s8(llr2);
-    int8x8_t result = vmin_s8(llr1, llr2);
-    int8x8_t result_neg = vneg_s8(result);
-    result = vbsl_s8(sign_vect, result_neg, result);
-    vst1_s8(output, result);
-    r1 += 8;
-    r2 += 8;
-    output += 8;
-  }
-
-  l = length & 0x7;
-  while (l > 0) {
-    int8_t a = *r1++;
-    int8_t b = *r2++;
-    *output++ = sat_8(sign(a * b) * min(a, b));
-    l--;
-  }
+inline void __attribute__((always_inline))
+setup_sc_decoder(sc_decoder *decoder, const uint8_t *frozen) {
+  decoder->curr_bit_idx = 0;
+  decoder->frozen_bits_mask = frozen;
 }
 
 template<int Nhalf, int L>
-static inline void f_l(const int8_t *in, int8_t *out) {
+inline void f_l(const int8_t *in, int8_t *out) {
   f<Nhalf * L>(in, &in[Nhalf * L], out);
 }
 
-template<int length>
-static inline void g(const int8_t *r1, const int8_t *r2, const uint8_t *dec,
-                     int8_t *output) {
-  // Calculate beliefs for right children in the successive cancellation (SC)
-  // algorithm:
-  // g(a, b, c=0) = a + b
-  // g(a, b, c=1) = a - b
-  int16_t l = length >> 4;
-  while (l > 0) {
-    int8x16_t llr1 = vld1q_s8(r1);
-    int8x16_t llr2 = vld1q_s8(r2);
-    uint8x16_t bit = vld1q_u8(dec);
-    int8x16_t result =
-        vbslq_s8(vceqzq_u8(bit), vqaddq_s8(llr2, llr1), vqsubq_s8(llr2, llr1));
-    vst1q_s8(output, result);
-    l--;
-    r1 += 16;
-    r2 += 16;
-    dec += 16;
-    output += 16;
-  }
-
-  if ((length >> 3) & 1) {
-    int8x8_t llr1 = vld1_s8(r1);
-    int8x8_t llr2 = vld1_s8(r2);
-    uint8x8_t bit = vld1_u8(dec);
-    int8x8_t result =
-        vbsl_s8(vceqz_u8(bit), vqadd_s8(llr2, llr1), vqsub_s8(llr2, llr1));
-    vst1_s8(output, result);
-    r1 += 8;
-    r2 += 8;
-    dec += 8;
-    output += 8;
-  }
-
-  l = length & 0x7;
-  while (l > 0) {
-    int8_t a = *r1++;
-    int8_t b = *r2++;
-    int8_t c = *dec++;
-    *output++ = sat_8((int16_t)(b + (1 - 2 * c) * a));
-    l--;
-  }
-}
-
-template<int Nhalf, int L, typename = void>
-struct g_l_impl {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    for (int i = 0; i < Nhalf; ++i) {
-      for (int j = 0; j < L; ++j) {
-        uint8_t h = L > 1 ? hist[j] : 0;
-        int8_t a = in[i * L + h];
-        int8_t b = in[(i + Nhalf) * L + h];
-        uint8_t c = dec[i * L + j];
-        out[i * L + j] = sat_8((int16_t)(b + (1 - 2 * c) * a));
-      }
-    }
-  }
-};
-
-static inline void g_l_x8(const int8_t *in, const uint8_t *dec,
-                          const uint8x8_t h8, uint8x8_t xs_idx, int8_t *out) {
-  xs_idx += h8;
-  int8x8_t as = vld1_s8(in);
-  int8x8_t bs = vld1_s8(&in[8]);
-
-  int8x8_t llr1 = vtbl1_s8(as, vreinterpret_s8_u8(xs_idx));
-  int8x8_t llr2 = vtbl1_s8(bs, vreinterpret_s8_u8(xs_idx));
-
-  uint8x8_t bit = vld1_u8(dec);
-
-  int8x8_t result =
-      vbsl_s8(vceqz_u8(bit), vqadd_s8(llr2, llr1), vqsub_s8(llr2, llr1));
-  vst1_s8(out, result);
-}
-
-static inline void g_l_x16(const int8_t *in, const uint8_t *dec,
-                           const uint8x16_t h8, uint8x16_t xs_idx,
-                           int8_t *out) {
-  xs_idx += h8;
-  int8x16_t as = vld1q_s8(in);
-  int8x16_t bs = vld1q_s8(&in[16]);
-
-  int8x16_t llr1 = vqtbl1q_s8(as, xs_idx);
-  int8x16_t llr2 = vqtbl1q_s8(bs, xs_idx);
-
-  uint8x16_t bit = vld1q_u8(dec);
-
-  int8x16_t result =
-      vbslq_s8(vceqzq_u8(bit), vqaddq_s8(llr2, llr1), vqsubq_s8(llr2, llr1));
-  vst1q_s8(out, result);
-}
-
-template<int Nhalf, int L, int Max_Count>
-static inline void g_l_x16_loop(const int8_t *in, const uint8_t *dec,
-                                const uint8x16_t h8, uint8x16_t xs_idx,
-                                int8_t *out) {
-  xs_idx += h8;
-  for (int i = 0; i < Nhalf; i += Max_Count) {
-    int8x16_t as = vld1q_s8(&in[i * L]);
-    int8x16_t bs = vld1q_s8(&in[(i + Nhalf) * L]);
-
-    int8x16_t llr1 = vqtbl1q_s8(as, xs_idx);
-    int8x16_t llr2 = vqtbl1q_s8(bs, xs_idx);
-
-    uint8x16_t bit = vld1q_u8(&dec[i * L]);
-
-    int8x16_t result =
-        vbslq_s8(vceqzq_u8(bit), vqaddq_s8(llr2, llr1), vqsubq_s8(llr2, llr1));
-    vst1q_s8(out, result);
-    out += 16;
-  }
-}
-
-template<int Nhalf>
-struct g_l_impl<Nhalf, 8, std::enable_if_t<(Nhalf > 2)>> {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    uint8x16_t h8 = vld_histq_l8(hist);
-    uint8x16_t xs_idx = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
-    g_l_x16_loop<Nhalf, 8, 2>(in, dec, h8, xs_idx, out);
-  }
-};
-
-template<int Nhalf>
-struct g_l_impl<Nhalf, 4, std::enable_if_t<(Nhalf > 4)>> {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    uint8x16_t h8 = vld_histq_l4(hist);
-    uint8x16_t xs_idx = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
-    g_l_x16_loop<Nhalf, 4, 4>(in, dec, h8, xs_idx, out);
-  }
-};
-
-template<int Nhalf>
-struct g_l_impl<Nhalf, 2, std::enable_if_t<(Nhalf >= 8)>> {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    uint8x16_t h8 = vld_histq_l2(hist);
-    uint8x16_t xs_idx = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
-    g_l_x16_loop<Nhalf, 2, 8>(in, dec, h8, xs_idx, out);
-  }
-};
-
-template<>
-struct g_l_impl<2, 4> {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    // specialised N=2-byte chunks interleaved (times L=4).
-    uint8x8_t h8 = vld_hist_l4(hist);
-    uint8x8_t xs_idx = {0, 0, 0, 0, 4, 4, 4, 4};
-    g_l_x8(in, dec, h8, xs_idx, out);
-  }
-};
-
-template<>
-struct g_l_impl<2, 8> {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    // specialised N=2-byte chunks interleaved (times L=8).
-    uint8x16_t h8 = vld_histq_l8(hist);
-    uint8x16_t xs_idx = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
-    g_l_x16(in, dec, h8, xs_idx, out);
-  }
-};
-
-template<>
-struct g_l_impl<4, 2> {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    uint8x8_t h8 = vld_hist_l2(hist);
-    uint8x8_t xs_idx = {0, 0, 2, 2, 4, 4, 6, 6};
-    g_l_x8(in, dec, h8, xs_idx, out);
-  }
-};
-
-template<>
-struct g_l_impl<4, 4> {
-  static inline void g_l(const int8_t *in, const uint8_t *dec,
-                         const uint8_t *hist, int8_t *out) {
-    // specialised N=4-byte chunks interleaved (times L=4).
-    uint8x16_t h8 = vld_histq_l4(hist);
-    uint8x16_t xs_idx = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
-    g_l_x16(in, dec, h8, xs_idx, out);
-  }
-};
-
 template<int Nhalf, int L>
-static inline void g_l(const int8_t *in, const uint8_t *dec,
-                       const uint8_t *hist, int8_t *out) {
+inline void g_l(const int8_t *in, const uint8_t *dec, const uint8_t *hist,
+                int8_t *out) {
   // Calculate beliefs for right children in the successive cancellation list
   // (SCL) algorithm:
   // g(a_h, b_h, c_i=0) = a_h + b_h
@@ -346,7 +47,7 @@ static inline void g_l(const int8_t *in, const uint8_t *dec,
 }
 
 template<int Nhalf, int L>
-static inline void g_top(const int8_t *in, const uint8_t *dec, int8_t *out) {
+inline void g_top(const int8_t *in, const uint8_t *dec, int8_t *out) {
   // no history here, since no differing beliefs to choose from.
   static_assert(Nhalf >= 16);
   static_assert(Nhalf % 4 == 0);
@@ -393,261 +94,39 @@ static inline void g_top(const int8_t *in, const uint8_t *dec, int8_t *out) {
   }
 }
 
-template<int Nhalf, int L>
-static inline void __attribute__((always_inline))
-combine_l(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
-          const uint8_t *hist) {
-  static_assert(Nhalf >= 2);
+template<int L, int N>
+inline void zip1_l(const int8_t *__restrict in, int8_t *__restrict out) {
+  static_assert(N % 16 == 0);
   if constexpr (L == 8) {
-    uint8x16_t h8 = vld_histq_l8(hist);
-    uint8x16_t x0_idx = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
-    x0_idx += h8;
-    for (int i = 0; i < Nhalf; i += 2) {
-      uint8x16_t x0 = vld1q_u8(dec1);
-      uint8x16_t x1 = vld1q_u8(dec2);
-      x0 = vqtbl1q_u8(x0, x0_idx);
-      vst1q_u8(output, x0 ^ x1);
-      vst1q_u8(&output[Nhalf * L], x1);
-      dec1 += 16;
-      dec2 += 16;
-      output += 16;
-    }
-  } else if constexpr (L == 4 && Nhalf % 4 == 0) {
-    uint8x16_t h8 = vld_histq_l4(hist);
-    uint8x16_t x0_idx = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
-    x0_idx += h8;
-    for (int i = 0; i < Nhalf; i += 4) {
-      uint8x16_t x0 = vld1q_u8(dec1);
-      uint8x16_t x1 = vld1q_u8(dec2);
-      x0 = vqtbl1q_u8(x0, x0_idx);
-      vst1q_u8(output, x0 ^ x1);
-      vst1q_u8(&output[Nhalf * L], x1);
-      dec1 += 16;
-      dec2 += 16;
-      output += 16;
+    for (int i = 0; i < N; i += 2) {
+      int8x16_t x1 = vld1q_dup_s8(in);
+      int8x16_t x2 = vld1q_dup_s8(in + 1);
+      vst1q_s8(out, vextq_s8(x1, x2, 8));
+      in += 2;
+      out += 16;
     }
-  } else if constexpr (L == 2 && Nhalf % 8 == 0) {
-    uint8x16_t h8 = vld_histq_l2(hist);
-    uint8x16_t x0_idx = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
-    x0_idx += h8;
-    for (int i = 0; i < Nhalf; i += 8) {
-      uint8x16_t x0 = vld1q_u8(dec1);
-      uint8x16_t x1 = vld1q_u8(dec2);
-      x0 = vqtbl1q_u8(x0, x0_idx);
-      vst1q_u8(output, x0 ^ x1);
-      vst1q_u8(&output[Nhalf * L], x1);
-      dec1 += 16;
-      dec2 += 16;
-      output += 16;
+  } else if constexpr (L == 4) {
+    for (int i = 0; i < N; i += 4) {
+      int8x8_t x0 = vld1s_s8(in);
+      int8x16_t x1 = vzip1l_s8(x0, x0);
+      int8x16_t x2 = vzip1q_s8(x1, x1);
+      vst1q_s8(out, x2);
+      in += 4;
+      out += 16;
     }
-  } else if constexpr (L == 1) {
-    for (int i = 0; i < Nhalf; ++i) {
-      output[i] = dec1[i] ^ dec2[i];
-      output[Nhalf + i] = dec2[i];
+  } else if constexpr (L == 2) {
+    for (int i = 0; i < N; i += 8) {
+      int8x8_t x0 = vld1_s8(in);
+      int8x16_t x1 = vzip1l_s8(x0, x0);
+      vst1q_s8(out, x1);
+      in += 8;
+      out += 16;
     }
   } else {
-    uint8_t x0[Nhalf * L];
-    uint8_t x1[Nhalf * L];
-    for (int j = 0; j < Nhalf; ++j) {
-      for (int i = 0; i < L; ++i) {
-        uint8_t h = L > 1 ? hist[i] : 0;
-        x0[L * j + i] = dec1[L * j + h];
-        x1[L * j + i] = dec2[L * j + i];
-      }
-    }
-    for (int i = 0; i < L * Nhalf; ++i) {
-      output[i] = x0[i] ^ x1[i];
-      output[L * Nhalf + i] = x1[i];
-    }
-  }
-}
-
-template<>
-inline void __attribute__((always_inline))
-combine_l<2, 2>(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
-                const uint8_t *hist) {
-  uint8x8_t h8 = vld_hist_l2(hist);
-  uint8x8_t x0 = vld1s_u8(dec1);
-  uint8x8_t x1 = vld1s_u8(dec2);
-
-  uint8x8_t x0_idx = {0, 0, 2, 2, 4, 4, 6, 6};
-  x0_idx += h8;
-  x0 = vtbl1_u8(x0, x0_idx);
-
-  *(uint32_t *)output = vreinterpret_u32_u8(x0 ^ x1)[0];
-  output += 4;
-  *(uint32_t *)output = vreinterpret_u32_u8(x1)[0];
-}
-
-template<>
-inline void __attribute__((always_inline))
-combine_l<2, 4>(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
-                const uint8_t *hist) {
-  uint8x8_t h8 = vld_hist_l4(hist);
-  uint8x8_t x0 = vld1_u8(dec1);
-  uint8x8_t x1 = vld1_u8(dec2);
-
-  uint8x8_t x0_idx = {0, 0, 0, 0, 4, 4, 4, 4};
-  x0_idx += h8;
-  x0 = vtbl1_u8(x0, x0_idx);
-
-  vst1_u8(output, x0 ^ x1);
-  vst1_u8(&output[8], x1);
-}
-
-template<>
-inline void __attribute__((always_inline))
-combine_l<4, 2>(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
-                const uint8_t *hist) {
-  uint8x8_t h8 = vld_hist_l2(hist);
-  uint8x8_t x0 = vld1_u8(dec1);
-  uint8x8_t x1 = vld1_u8(dec2);
-
-  uint8x8_t x0_idx = {0, 0, 2, 2, 4, 4, 6, 6};
-  x0_idx += h8;
-  x0 = vtbl1_u8(x0, x0_idx);
-
-  vst1_u8(output, x0 ^ x1);
-  vst1_u8(&output[8], x1);
-}
-
-template<int L>
-static inline void combine_hist(const uint8_t *hist1, const uint8_t *hist2,
-                                uint8_t *hist) {
-  for (int i = 0; i < L; ++i) {
-    hist[i] = hist1[hist2[i]];
-  }
-}
-
-template<>
-inline void combine_hist<8>(const uint8_t *hist1, const uint8_t *hist2,
-                            uint8_t *hist) {
-  uint8x8_t h1 = vld1_u8(hist1);
-  uint8x8_t h2 = vld1_u8(hist2);
-  uint8x8_t h = vtbl1_u8(h1, h2);
-  vst1_u8(hist, h);
-}
-
-template<>
-inline void combine_hist<4>(const uint8_t *hist1, const uint8_t *hist2,
-                            uint8_t *hist) {
-  uint8x8_t h1 = vld1s_u8(hist1);
-  uint8x8_t h2 = vld1s_u8(hist2);
-  uint8x8_t h = vtbl1_u8(h1, h2);
-  *(uint32_t *)hist = vreinterpret_u32_u8(h)[0];
-}
-
-template<>
-inline void combine_hist<2>(const uint8_t *hist1, const uint8_t *hist2,
-                            uint8_t *hist) {
-  uint8x8_t h1 = vld1h_u8(hist1);
-  uint8x8_t h2 = vld1h_u8(hist2);
-  uint8x8_t h = vtbl1_u8(h1, h2);
-  *(uint16_t *)hist = vreinterpret_u16_u8(h)[0];
-}
-
-template<>
-inline void combine_hist<1>(const uint8_t * /*hist1*/,
-                            const uint8_t * /*hist2*/, uint8_t * /*hist*/) {
-  // nothing to do if L=1, only one choice of history.
-}
-
-template<int N, int L>
-static inline void combine_seq_out(const uint8_t *seq1, const uint8_t *seq2,
-                                   const uint8_t *hist2, uint8_t *p_u_seq_out) {
-  for (int i = 0; i < L; ++i) {
-    uint8_t h = L > 1 ? hist2[i] : 0;
-    memcpy((void *)&p_u_seq_out[i * N], (const void *)&seq1[h * N / 2], N / 2);
-    memcpy((void *)&p_u_seq_out[i * N + N / 2], (const void *)&seq2[i * N / 2],
-           N / 2);
+    assert(false && "unimplemented!");
   }
 }
 
-template<>
-inline void combine_seq_out<2, 2>(const uint8_t *seq1, const uint8_t *seq2,
-                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
-  uint8x8_t h = vld1h_u8(hist2);
-  uint8x8_t s1 = vtbl1_u8(vld1h_u8(seq1), h);
-  uint8x8_t s2 = vld1h_u8(seq2);
-  *(uint32_t *)p_u_seq_out = vreinterpret_u32_u8(vzip1_u8(s1, s2))[0];
-}
-
-template<>
-inline void combine_seq_out<2, 4>(const uint8_t *seq1, const uint8_t *seq2,
-                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
-  uint8x8_t h = vld1s_u8(hist2);
-  uint8x8_t s1 = vtbl1_u8(vld1s_u8(seq1), h);
-  uint8x8_t s2 = vld1s_u8(seq2);
-  vst1_u8(p_u_seq_out, vzip1_u8(s1, s2));
-}
-
-template<>
-inline void combine_seq_out<2, 8>(const uint8_t *seq1, const uint8_t *seq2,
-                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
-  uint8x8_t h = vld1_u8(hist2);
-  uint8x8_t s1 = vtbl1_u8(vld1_u8(seq1), h);
-  uint8x8_t s2 = vld1_u8(seq2);
-  vst1q_u8(p_u_seq_out, vzip1l_u8(s1, s2));
-}
-
-template<>
-inline void combine_seq_out<4, 2>(const uint8_t *seq1, const uint8_t *seq2,
-                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
-  uint16x4_t in1 = vld1s_u16((const uint16_t *)seq1);
-  uint16x4_t in2 = vld1s_u16((const uint16_t *)seq2);
-
-  uint8x8_t h = vld1h_u8(hist2);
-  h = vzip1_u8(h, h);
-  uint8x8_t h_ofs0 = {0, 1, 0, 1, 0, 1, 0, 1};
-  h_ofs0 = vsli_n_u8(h_ofs0, h, 1);
-
-  in1 = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(in1), h_ofs0));
-
-  vst1_u16((uint16_t *)p_u_seq_out, vzip1_u16(in1, in2));
-}
-
-template<>
-inline void combine_seq_out<4, 4>(const uint8_t *seq1, const uint8_t *seq2,
-                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
-  uint16x4_t in1 = vld1_u16((const uint16_t *)seq1);
-  uint16x4_t in2 = vld1_u16((const uint16_t *)seq2);
-
-  uint8x8_t h = vld1s_u8(hist2);
-  h = vzip1_u8(h, h);
-  uint8x8_t h_ofs0 = {0, 1, 0, 1, 0, 1, 0, 1};
-  h_ofs0 = vsli_n_u8(h_ofs0, h, 1);
-
-  in1 = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(in1), h_ofs0));
-
-  vst1q_u16((uint16_t *)p_u_seq_out, vzip1l_u16(in1, in2));
-}
-
-template<>
-inline void combine_seq_out<4, 8>(const uint8_t *seq1, const uint8_t *seq2,
-                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
-  uint16x8_t in1 = vld1q_u16((const uint16_t *)seq1);
-  uint16x8_t in2 = vld1q_u16((const uint16_t *)seq2);
-
-  uint8x16_t h = vcombine_u8(vld1_u8(hist2), vdup_n_u8(0));
-  h = vzip1q_u8(h, h);
-  uint8x16_t h_ofs0 = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
-  h_ofs0 = vsliq_n_u8(h_ofs0, h, 1);
-
-  in1 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in1), h_ofs0));
-
-  vst1q_u16((uint16_t *)p_u_seq_out, vzip1q_u16(in1, in2));
-  vst1q_u16((uint16_t *)p_u_seq_out + 8, vzip2q_u16(in1, in2));
-}
-
-static inline void __attribute__((always_inline))
-setup_sc_decoder(sc_decoder *decoder, const uint8_t *frozen) {
-  decoder->curr_bit_idx = 0;
-  decoder->frozen_bits_mask = frozen;
-}
-
-template<int N, int L, typename = void>
-struct polar_stage;
-
 template<int L, int K, typename = void>
 struct sort_decoder_entries_impl {
   static inline void sort(uint8_t *est_bits, uint32_t *pm, uint8_t *hist) {
@@ -678,11 +157,19 @@ struct sort_decoder_entries_impl<L, 0> {
 };
 
 template<int L>
-static void sort_decoder_entries(uint8_t *est_bits, uint32_t *pm,
-                                 uint8_t *hist) {
+void sort_decoder_entries(uint8_t *est_bits, uint32_t *pm, uint8_t *hist) {
   sort_decoder_entries_impl<L * 2, L>::sort(est_bits, pm, hist);
 }
 
+template<int N, int L, typename = void>
+struct polar_stage;
+
+inline uint32x4_t vmlsl_u32_s16(uint32x4_t acc, uint16x4_t x, uint16x4_t y) {
+  return vreinterpretq_u32_s32(vmlsl_s16(vreinterpretq_s32_u32(acc),
+                                         vreinterpret_s16_u16(x),
+                                         vreinterpret_s16_u16(y)));
+}
+
 template<int L>
 struct polar_stage<2, L> {
   static inline void __attribute__((always_inline))
@@ -1035,7 +522,7 @@ struct polar_stage<2, 8> {
   }
 };
 
-static inline uint8_t __attribute__((always_inline))
+inline uint8_t __attribute__((always_inline))
 estimate_bit(uint32_t frozen, const int32_t l_u, uint32_t idx) {
   return (frozen & 0x80) != 0U ? 0 : static_cast<int>(l_u < 0);
 }
diff --git a/src/UpperPHY/Polar/arm_polar_decoder.hpp b/src/UpperPHY/Polar/arm_polar_decoder.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef2091cf7f835d26b0bc90b1c6b86f3219ed5c6d
--- /dev/null
+++ b/src/UpperPHY/Polar/arm_polar_decoder.hpp
@@ -0,0 +1,21 @@
+/*
+    Arm RAN Acceleration Library
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+*/
+
+#pragma once
+
+namespace {
+
+// Extract the sign of an integer
+inline int8_t __attribute__((always_inline)) sign(int32_t x) {
+  return static_cast<int>(x > 0) - static_cast<int>(x < 0);
+}
+
+// calculate the minimum absolute value between two integers
+inline int16_t __attribute__((always_inline))
+min(const int8_t x, const int8_t y) {
+  return abs(x) < abs(y) ? abs(x) : abs(y);
+}
+
+} // namespace
diff --git a/src/UpperPHY/Polar/arm_polar_decoder_neon.hpp b/src/UpperPHY/Polar/arm_polar_decoder_neon.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..249f2e0a961c22a1e8a8b8f3874924fa4d7a00e1
--- /dev/null
+++ b/src/UpperPHY/Polar/arm_polar_decoder_neon.hpp
@@ -0,0 +1,507 @@
+/*
+    Arm RAN Acceleration Library
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+*/
+
+#pragma once
+#include "arm_polar_decoder.hpp"
+
+namespace {
+
+inline uint8x16_t vld_histq_l8(const uint8_t *hist) {
+  return vreinterpretq_u8_u64(vld1q_dup_u64((const uint64_t *)hist));
+}
+
+inline uint8x16_t vld_histq_l4(const uint8_t *hist) {
+  return vreinterpretq_u8_u32(vld1q_dup_u32((const uint32_t *)hist));
+}
+
+inline uint8x8_t vld_hist_l4(const uint8_t *hist) {
+  return vreinterpret_u8_u32(vld1_dup_u32((const uint32_t *)hist));
+}
+
+inline uint8x16_t vld_histq_l2(const uint8_t *hist) {
+  return vreinterpretq_u8_u16(vld1q_dup_u16((const uint16_t *)hist));
+}
+
+inline uint8x8_t vld_hist_l2(const uint8_t *hist) {
+  return vreinterpret_u8_u16(vld1_dup_u16((const uint16_t *)hist));
+}
+
+template<int Nhalf, int L, typename = void>
+struct g_l_impl {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    for (int i = 0; i < Nhalf; ++i) {
+      for (int j = 0; j < L; ++j) {
+        uint8_t h = L > 1 ? hist[j] : 0;
+        int8_t a = in[i * L + h];
+        int8_t b = in[(i + Nhalf) * L + h];
+        uint8_t c = dec[i * L + j];
+        out[i * L + j] = sat_8((int16_t)(b + (1 - 2 * c) * a));
+      }
+    }
+  }
+};
+
+template<int Nhalf, int L, int Max_Count>
+inline void g_l_x16_loop(const int8_t *in, const uint8_t *dec,
+                         const uint8x16_t h8, uint8x16_t xs_idx, int8_t *out) {
+  xs_idx += h8;
+  for (int i = 0; i < Nhalf; i += Max_Count) {
+    int8x16_t as = vld1q_s8(&in[i * L]);
+    int8x16_t bs = vld1q_s8(&in[(i + Nhalf) * L]);
+
+    int8x16_t llr1 = vqtbl1q_s8(as, xs_idx);
+    int8x16_t llr2 = vqtbl1q_s8(bs, xs_idx);
+
+    uint8x16_t bit = vld1q_u8(&dec[i * L]);
+
+    int8x16_t result =
+        vbslq_s8(vceqzq_u8(bit), vqaddq_s8(llr2, llr1), vqsubq_s8(llr2, llr1));
+    vst1q_s8(out, result);
+    out += 16;
+  }
+}
+
+template<int Nhalf>
+struct g_l_impl<Nhalf, 8, std::enable_if_t<(Nhalf > 2)>> {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    uint8x16_t h8 = vld_histq_l8(hist);
+    uint8x16_t xs_idx = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
+    g_l_x16_loop<Nhalf, 8, 2>(in, dec, h8, xs_idx, out);
+  }
+};
+
+template<int Nhalf>
+struct g_l_impl<Nhalf, 4, std::enable_if_t<(Nhalf > 4)>> {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    uint8x16_t h8 = vld_histq_l4(hist);
+    uint8x16_t xs_idx = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+    g_l_x16_loop<Nhalf, 4, 4>(in, dec, h8, xs_idx, out);
+  }
+};
+
+template<int Nhalf>
+struct g_l_impl<Nhalf, 2, std::enable_if_t<(Nhalf >= 8)>> {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    uint8x16_t h8 = vld_histq_l2(hist);
+    uint8x16_t xs_idx = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+    g_l_x16_loop<Nhalf, 2, 8>(in, dec, h8, xs_idx, out);
+  }
+};
+
+inline void g_l_x8(const int8_t *in, const uint8_t *dec, const uint8x8_t h8,
+                   uint8x8_t xs_idx, int8_t *out) {
+  xs_idx += h8;
+  int8x8_t as = vld1_s8(in);
+  int8x8_t bs = vld1_s8(&in[8]);
+
+  int8x8_t llr1 = vtbl1_s8(as, vreinterpret_s8_u8(xs_idx));
+  int8x8_t llr2 = vtbl1_s8(bs, vreinterpret_s8_u8(xs_idx));
+
+  uint8x8_t bit = vld1_u8(dec);
+
+  int8x8_t result =
+      vbsl_s8(vceqz_u8(bit), vqadd_s8(llr2, llr1), vqsub_s8(llr2, llr1));
+  vst1_s8(out, result);
+}
+
+inline void g_l_x16(const int8_t *in, const uint8_t *dec, const uint8x16_t h8,
+                    uint8x16_t xs_idx, int8_t *out) {
+  xs_idx += h8;
+  int8x16_t as = vld1q_s8(in);
+  int8x16_t bs = vld1q_s8(&in[16]);
+
+  int8x16_t llr1 = vqtbl1q_s8(as, xs_idx);
+  int8x16_t llr2 = vqtbl1q_s8(bs, xs_idx);
+
+  uint8x16_t bit = vld1q_u8(dec);
+
+  int8x16_t result =
+      vbslq_s8(vceqzq_u8(bit), vqaddq_s8(llr2, llr1), vqsubq_s8(llr2, llr1));
+  vst1q_s8(out, result);
+}
+
+template<>
+struct g_l_impl<2, 4> {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    // specialised N=2-byte chunks interleaved (times L=4).
+    uint8x8_t h8 = vld_hist_l4(hist);
+    uint8x8_t xs_idx = {0, 0, 0, 0, 4, 4, 4, 4};
+    g_l_x8(in, dec, h8, xs_idx, out);
+  }
+};
+
+template<>
+struct g_l_impl<2, 8> {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    // specialised N=2-byte chunks interleaved (times L=8).
+    uint8x16_t h8 = vld_histq_l8(hist);
+    uint8x16_t xs_idx = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
+    g_l_x16(in, dec, h8, xs_idx, out);
+  }
+};
+
+template<>
+struct g_l_impl<4, 2> {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    uint8x8_t h8 = vld_hist_l2(hist);
+    uint8x8_t xs_idx = {0, 0, 2, 2, 4, 4, 6, 6};
+    g_l_x8(in, dec, h8, xs_idx, out);
+  }
+};
+
+template<>
+struct g_l_impl<4, 4> {
+  static inline void g_l(const int8_t *in, const uint8_t *dec,
+                         const uint8_t *hist, int8_t *out) {
+    // specialised N=4-byte chunks interleaved (times L=4).
+    uint8x16_t h8 = vld_histq_l4(hist);
+    uint8x16_t xs_idx = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+    g_l_x16(in, dec, h8, xs_idx, out);
+  }
+};
+
+template<int Nhalf, int L>
+inline void __attribute__((always_inline))
+combine_l(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
+          const uint8_t *hist) {
+  static_assert(Nhalf >= 2);
+  if constexpr (L == 8) {
+    uint8x16_t h8 = vld_histq_l8(hist);
+    uint8x16_t x0_idx = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
+    x0_idx += h8;
+    for (int i = 0; i < Nhalf; i += 2) {
+      uint8x16_t x0 = vld1q_u8(dec1);
+      uint8x16_t x1 = vld1q_u8(dec2);
+      x0 = vqtbl1q_u8(x0, x0_idx);
+      vst1q_u8(output, x0 ^ x1);
+      vst1q_u8(&output[Nhalf * L], x1);
+      dec1 += 16;
+      dec2 += 16;
+      output += 16;
+    }
+  } else if constexpr (L == 4 && Nhalf % 4 == 0) {
+    uint8x16_t h8 = vld_histq_l4(hist);
+    uint8x16_t x0_idx = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12};
+    x0_idx += h8;
+    for (int i = 0; i < Nhalf; i += 4) {
+      uint8x16_t x0 = vld1q_u8(dec1);
+      uint8x16_t x1 = vld1q_u8(dec2);
+      x0 = vqtbl1q_u8(x0, x0_idx);
+      vst1q_u8(output, x0 ^ x1);
+      vst1q_u8(&output[Nhalf * L], x1);
+      dec1 += 16;
+      dec2 += 16;
+      output += 16;
+    }
+  } else if constexpr (L == 2 && Nhalf % 8 == 0) {
+    uint8x16_t h8 = vld_histq_l2(hist);
+    uint8x16_t x0_idx = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
+    x0_idx += h8;
+    for (int i = 0; i < Nhalf; i += 8) {
+      uint8x16_t x0 = vld1q_u8(dec1);
+      uint8x16_t x1 = vld1q_u8(dec2);
+      x0 = vqtbl1q_u8(x0, x0_idx);
+      vst1q_u8(output, x0 ^ x1);
+      vst1q_u8(&output[Nhalf * L], x1);
+      dec1 += 16;
+      dec2 += 16;
+      output += 16;
+    }
+  } else if constexpr (L == 1) {
+    for (int i = 0; i < Nhalf; ++i) {
+      output[i] = dec1[i] ^ dec2[i];
+      output[Nhalf + i] = dec2[i];
+    }
+  } else {
+    uint8_t x0[Nhalf * L];
+    uint8_t x1[Nhalf * L];
+    for (int j = 0; j < Nhalf; ++j) {
+      for (int i = 0; i < L; ++i) {
+        uint8_t h = L > 1 ? hist[i] : 0;
+        x0[L * j + i] = dec1[L * j + h];
+        x1[L * j + i] = dec2[L * j + i];
+      }
+    }
+    for (int i = 0; i < L * Nhalf; ++i) {
+      output[i] = x0[i] ^ x1[i];
+      output[L * Nhalf + i] = x1[i];
+    }
+  }
+}
+
+template<>
+inline void __attribute__((always_inline))
+combine_l<2, 2>(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
+                const uint8_t *hist) {
+  uint8x8_t h8 = vld_hist_l2(hist);
+  uint8x8_t x0 = vld1s_u8(dec1);
+  uint8x8_t x1 = vld1s_u8(dec2);
+
+  uint8x8_t x0_idx = {0, 0, 2, 2, 4, 4, 6, 6};
+  x0_idx += h8;
+  x0 = vtbl1_u8(x0, x0_idx);
+
+  *(uint32_t *)output = vreinterpret_u32_u8(x0 ^ x1)[0];
+  output += 4;
+  *(uint32_t *)output = vreinterpret_u32_u8(x1)[0];
+}
+
+template<>
+inline void __attribute__((always_inline))
+combine_l<2, 4>(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
+                const uint8_t *hist) {
+  uint8x8_t h8 = vld_hist_l4(hist);
+  uint8x8_t x0 = vld1_u8(dec1);
+  uint8x8_t x1 = vld1_u8(dec2);
+
+  uint8x8_t x0_idx = {0, 0, 0, 0, 4, 4, 4, 4};
+  x0_idx += h8;
+  x0 = vtbl1_u8(x0, x0_idx);
+
+  vst1_u8(output, x0 ^ x1);
+  vst1_u8(&output[8], x1);
+}
+
+template<>
+inline void __attribute__((always_inline))
+combine_l<4, 2>(const uint8_t *dec1, const uint8_t *dec2, uint8_t *output,
+                const uint8_t *hist) {
+  uint8x8_t h8 = vld_hist_l2(hist);
+  uint8x8_t x0 = vld1_u8(dec1);
+  uint8x8_t x1 = vld1_u8(dec2);
+
+  uint8x8_t x0_idx = {0, 0, 2, 2, 4, 4, 6, 6};
+  x0_idx += h8;
+  x0 = vtbl1_u8(x0, x0_idx);
+
+  vst1_u8(output, x0 ^ x1);
+  vst1_u8(&output[8], x1);
+}
+
+template<int N, int L>
+inline void combine_seq_out(const uint8_t *seq1, const uint8_t *seq2,
+                            const uint8_t *hist2, uint8_t *p_u_seq_out) {
+  for (int i = 0; i < L; ++i) {
+    uint8_t h = L > 1 ? hist2[i] : 0;
+    memcpy((void *)&p_u_seq_out[i * N], (const void *)&seq1[h * N / 2], N / 2);
+    memcpy((void *)&p_u_seq_out[i * N + N / 2], (const void *)&seq2[i * N / 2],
+           N / 2);
+  }
+}
+
+template<>
+inline void combine_seq_out<2, 2>(const uint8_t *seq1, const uint8_t *seq2,
+                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
+  uint8x8_t h = vld1h_u8(hist2);
+  uint8x8_t s1 = vtbl1_u8(vld1h_u8(seq1), h);
+  uint8x8_t s2 = vld1h_u8(seq2);
+  *(uint32_t *)p_u_seq_out = vreinterpret_u32_u8(vzip1_u8(s1, s2))[0];
+}
+
+template<>
+inline void combine_seq_out<2, 4>(const uint8_t *seq1, const uint8_t *seq2,
+                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
+  uint8x8_t h = vld1s_u8(hist2);
+  uint8x8_t s1 = vtbl1_u8(vld1s_u8(seq1), h);
+  uint8x8_t s2 = vld1s_u8(seq2);
+  vst1_u8(p_u_seq_out, vzip1_u8(s1, s2));
+}
+
+template<>
+inline void combine_seq_out<2, 8>(const uint8_t *seq1, const uint8_t *seq2,
+                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
+  uint8x8_t h = vld1_u8(hist2);
+  uint8x8_t s1 = vtbl1_u8(vld1_u8(seq1), h);
+  uint8x8_t s2 = vld1_u8(seq2);
+  vst1q_u8(p_u_seq_out, vzip1l_u8(s1, s2));
+}
+
+template<>
+inline void combine_seq_out<4, 2>(const uint8_t *seq1, const uint8_t *seq2,
+                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
+  uint16x4_t in1 = vld1s_u16((const uint16_t *)seq1);
+  uint16x4_t in2 = vld1s_u16((const uint16_t *)seq2);
+
+  uint8x8_t h = vld1h_u8(hist2);
+  h = vzip1_u8(h, h);
+  uint8x8_t h_ofs0 = {0, 1, 0, 1, 0, 1, 0, 1};
+  h_ofs0 = vsli_n_u8(h_ofs0, h, 1);
+
+  in1 = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(in1), h_ofs0));
+
+  vst1_u16((uint16_t *)p_u_seq_out, vzip1_u16(in1, in2));
+}
+
+template<>
+inline void combine_seq_out<4, 4>(const uint8_t *seq1, const uint8_t *seq2,
+                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
+  uint16x4_t in1 = vld1_u16((const uint16_t *)seq1);
+  uint16x4_t in2 = vld1_u16((const uint16_t *)seq2);
+
+  uint8x8_t h = vld1s_u8(hist2);
+  h = vzip1_u8(h, h);
+  uint8x8_t h_ofs0 = {0, 1, 0, 1, 0, 1, 0, 1};
+  h_ofs0 = vsli_n_u8(h_ofs0, h, 1);
+
+  in1 = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(in1), h_ofs0));
+
+  vst1q_u16((uint16_t *)p_u_seq_out, vzip1l_u16(in1, in2));
+}
+
+template<>
+inline void combine_seq_out<4, 8>(const uint8_t *seq1, const uint8_t *seq2,
+                                  const uint8_t *hist2, uint8_t *p_u_seq_out) {
+  uint16x8_t in1 = vld1q_u16((const uint16_t *)seq1);
+  uint16x8_t in2 = vld1q_u16((const uint16_t *)seq2);
+
+  uint8x16_t h = vcombine_u8(vld1_u8(hist2), vdup_n_u8(0));
+  h = vzip1q_u8(h, h);
+  uint8x16_t h_ofs0 = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
+  h_ofs0 = vsliq_n_u8(h_ofs0, h, 1);
+
+  in1 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in1), h_ofs0));
+
+  vst1q_u16((uint16_t *)p_u_seq_out, vzip1q_u16(in1, in2));
+  vst1q_u16((uint16_t *)p_u_seq_out + 8, vzip2q_u16(in1, in2));
+}
+
+template<int Length>
+inline void g(const int8_t *r1, const int8_t *r2, const uint8_t *dec,
+              int8_t *output) {
+  // Calculate beliefs for right children in the successive cancellation (SC)
+  // algorithm:
+  // g(a, b, c=0) = a + b
+  // g(a, b, c=1) = a - b
+  int16_t l = Length >> 4;
+  while (l > 0) {
+    int8x16_t llr1 = vld1q_s8(r1);
+    int8x16_t llr2 = vld1q_s8(r2);
+    uint8x16_t bit = vld1q_u8(dec);
+    int8x16_t result =
+        vbslq_s8(vceqzq_u8(bit), vqaddq_s8(llr2, llr1), vqsubq_s8(llr2, llr1));
+    vst1q_s8(output, result);
+    l--;
+    r1 += 16;
+    r2 += 16;
+    dec += 16;
+    output += 16;
+  }
+
+  if ((Length >> 3) & 1) {
+    int8x8_t llr1 = vld1_s8(r1);
+    int8x8_t llr2 = vld1_s8(r2);
+    uint8x8_t bit = vld1_u8(dec);
+    int8x8_t result =
+        vbsl_s8(vceqz_u8(bit), vqadd_s8(llr2, llr1), vqsub_s8(llr2, llr1));
+    vst1_s8(output, result);
+    r1 += 8;
+    r2 += 8;
+    dec += 8;
+    output += 8;
+  }
+
+  l = Length & 0x7;
+  while (l > 0) {
+    int8_t a = *r1++;
+    int8_t b = *r2++;
+    int8_t c = *dec++;
+    *output++ = sat_8((int16_t)(b + (1 - 2 * c) * a));
+    l--;
+  }
+}
+
+// calculate beliefs for left children in SCL algorithm
+template<int Length>
+inline void f(const int8_t *r1, const int8_t *r2, int8_t *output) {
+  int16_t l = Length >> 4;
+  while (l > 0) {
+    int8x16_t llr1 = vld1q_s8(r1);
+    int8x16_t llr2 = vld1q_s8(r2);
+    uint8x16_t sign_vect = vcltzq_s8(veorq_s8(llr1, llr2));
+    llr1 = vqabsq_s8(llr1);
+    llr2 = vqabsq_s8(llr2);
+    int8x16_t result = vminq_s8(llr1, llr2);
+    int8x16_t result_neg = vnegq_s8(result);
+    result = vbslq_s8(sign_vect, result_neg, result);
+    vst1q_s8(output, result);
+    l--;
+    r1 += 16;
+    r2 += 16;
+    output += 16;
+  }
+
+  if ((Length >> 3) & 1) {
+    int8x8_t llr1 = vld1_s8(r1);
+    int8x8_t llr2 = vld1_s8(r2);
+    uint8x8_t sign_vect = vcltz_s8(veor_s8(llr1, llr2));
+    llr1 = vqabs_s8(llr1);
+    llr2 = vqabs_s8(llr2);
+    int8x8_t result = vmin_s8(llr1, llr2);
+    int8x8_t result_neg = vneg_s8(result);
+    result = vbsl_s8(sign_vect, result_neg, result);
+    vst1_s8(output, result);
+    r1 += 8;
+    r2 += 8;
+    output += 8;
+  }
+
+  l = Length & 0x7;
+  while (l > 0) {
+    int8_t a = *r1++;
+    int8_t b = *r2++;
+    *output++ = sat_8(sign(a * b) * min(a, b));
+    l--;
+  }
+}
+
+template<int L>
+inline void combine_hist(const uint8_t *hist1, const uint8_t *hist2,
+                         uint8_t *hist) {
+  for (int i = 0; i < L; ++i) {
+    hist[i] = hist1[hist2[i]];
+  }
+}
+
+template<>
+inline void combine_hist<8>(const uint8_t *hist1, const uint8_t *hist2,
+                            uint8_t *hist) {
+  uint8x8_t h1 = vld1_u8(hist1);
+  uint8x8_t h2 = vld1_u8(hist2);
+  uint8x8_t h = vtbl1_u8(h1, h2);
+  vst1_u8(hist, h);
+}
+
+template<>
+inline void combine_hist<4>(const uint8_t *hist1, const uint8_t *hist2,
+                            uint8_t *hist) {
+  uint8x8_t h1 = vld1s_u8(hist1);
+  uint8x8_t h2 = vld1s_u8(hist2);
+  uint8x8_t h = vtbl1_u8(h1, h2);
+  *(uint32_t *)hist = vreinterpret_u32_u8(h)[0];
+}
+
+template<>
+inline void combine_hist<2>(const uint8_t *hist1, const uint8_t *hist2,
+                            uint8_t *hist) {
+  uint8x8_t h1 = vld1h_u8(hist1);
+  uint8x8_t h2 = vld1h_u8(hist2);
+  uint8x8_t h = vtbl1_u8(h1, h2);
+  *(uint16_t *)hist = vreinterpret_u16_u8(h)[0];
+}
+
+template<>
+inline void combine_hist<1>(const uint8_t * /*hist1*/,
+                            const uint8_t * /*hist2*/, uint8_t * /*hist*/) {
+  // nothing to do if L=1, only one choice of history.
+}
+
+} // namespace
\ No newline at end of file
diff --git a/src/UpperPHY/Polar/arm_polar_encoder.c b/src/UpperPHY/Polar/arm_polar_encoder.c
index 9441c31ce481ba8407da2013de72d9913fc4fb35..cd7412529419448403e25c4de60a9460dd614773 100644
--- a/src/UpperPHY/Polar/arm_polar_encoder.c
+++ b/src/UpperPHY/Polar/arm_polar_encoder.c
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/UpperPHY/Polar/arm_polar_frozen_bits.cpp b/src/UpperPHY/Polar/arm_polar_frozen_bits.cpp
index 80fb6ae5da82f15cebe67ba713a7f26e8c2c54f6..6887a7447d8a0a4270750c8f22da94a27dc8b3f2 100644
--- a/src/UpperPHY/Polar/arm_polar_frozen_bits.cpp
+++ b/src/UpperPHY/Polar/arm_polar_frozen_bits.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/UpperPHY/Polar/arm_polar_rate_matching.cpp b/src/UpperPHY/Polar/arm_polar_rate_matching.cpp
index 8e7f72e50aaf0bd6746c2100c9d7b91ed995994e..2d6ca39c819faa1d99e8315105f95b9574ddf9a9 100644
--- a/src/UpperPHY/Polar/arm_polar_rate_matching.cpp
+++ b/src/UpperPHY/Polar/arm_polar_rate_matching.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "utils/allocators.hpp"
diff --git a/src/UpperPHY/Polar/arm_polar_rate_recovery.cpp b/src/UpperPHY/Polar/arm_polar_rate_recovery.cpp
index 875a5257f71c477766cc4ca45a0bd61f3b995319..d1c6ff562f1cd5495e01474916b07a1923c753ce 100644
--- a/src/UpperPHY/Polar/arm_polar_rate_recovery.cpp
+++ b/src/UpperPHY/Polar/arm_polar_rate_recovery.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "utils/allocators.hpp"
diff --git a/src/UpperPHY/Polar/arm_polar_subchannel_deinterleave.cpp b/src/UpperPHY/Polar/arm_polar_subchannel_deinterleave.cpp
index c1c1e3bf6f41e9c6c07a78ead678ee6a75ff2a9a..8d0a317e33707326446e7f4a0b1b097b381bd2ce 100644
--- a/src/UpperPHY/Polar/arm_polar_subchannel_deinterleave.cpp
+++ b/src/UpperPHY/Polar/arm_polar_subchannel_deinterleave.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/UpperPHY/Polar/arm_polar_subchannel_interleave.cpp b/src/UpperPHY/Polar/arm_polar_subchannel_interleave.cpp
index f9a84189ee9fda5b5f86d7611d879880584d7a2e..d450c19abbb918204ea40deb6c61bbe6ed8f79a8 100644
--- a/src/UpperPHY/Polar/arm_polar_subchannel_interleave.cpp
+++ b/src/UpperPHY/Polar/arm_polar_subchannel_interleave.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/src/UpperPHY/Turbo/arm_turbo_decoder.cpp b/src/UpperPHY/Turbo/arm_turbo_decoder.cpp
index 3e544c2fc028944807fb9ef6e96ce4229c2b38fa..f0935c5d569dd65513705978f04880681c5c98b2 100644
--- a/src/UpperPHY/Turbo/arm_turbo_decoder.cpp
+++ b/src/UpperPHY/Turbo/arm_turbo_decoder.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "intrinsics.h"
@@ -127,13 +127,10 @@ void trellis_termination(const float32x4_t *sys, const float32x4_t *par,
 // previous decoding stage (extrinsic)
 void decode_step(const float32x4_t *sys, const float32x4_t *par,
                  const float32x4_t *extrinsic, uint32_t k4, float32x4_t *llr,
-                 float32x4_t *alpha, float32x4_t *beta,
-                 const float32x4_t *beta_tail, float32x4x4_t *pdf4,
-                 float32x4_t l_c) {
-  uint32_t a_k_idx;
-  uint32_t a_kp1_idx;
-  uint32_t b_k_idx;
-  uint32_t b_kp1_idx;
+                 float32x4_t *alpha, const float32x4_t *beta_tail,
+                 float32x4x4_t *pdf4, float32x4_t l_c) {
+  uint32_t k_idx;
+  uint32_t kp1_idx;
 
   constexpr uint8x16_t rev_idx = {12, 13, 14, 15, 8, 9, 10, 11,
                                   4,  5,  6,  7,  0, 1, 2,  3};
@@ -187,25 +184,25 @@ void decode_step(const float32x4_t *sys, const float32x4_t *par,
     // Accumulate the state transition probabilities forwards through the
     // state transition trellis starting from the known encoder start state 0
     for (uint32_t j = 0; j < 4; j++) {
-      a_k_idx = 8 * i + j * 2;
-      a_kp1_idx = a_k_idx + 2;
+      k_idx = 8 * i + j * 2;
+      kp1_idx = k_idx + 2;
 
       // We need  g0 = {gamma[g_k_idx][0], gamma[g_k_idx + 1][0],
       //                gamma[g_k_idx + 2][0], gamma[g_k_idx + 3][0]};
-      //         a02 = {alpha[a_k_idx][0], alpha[a_k_idx][2],
-      //                alpha[a_k_idx + 1][0], alpha[a_k_idx + 1][2]};
+      //         a02 = {alpha[k_idx][0], alpha[k_idx][2],
+      //                alpha[k_idx + 1][0], alpha[k_idx + 1][2]};
       float32x4_t g0 = pdf4[i].val[j];
-      float32x4_t a02 = vuzp1q_f32(alpha[a_k_idx], alpha[a_k_idx + 1]);
+      float32x4_t a02 = vuzp1q_f32(alpha[k_idx], alpha[k_idx + 1]);
       float32x4_t left_1 = vaddq_f32(g0, a02);
       // We need  g2 = {gamma[g_k_idx][2], gamma[g_k_idx + 1][2],
       //                gamma[g_k_idx + 2][2], gamma[g_k_idx + 3][2]};
-      //          a13 = {alpha[a_k_idx][1], alpha[a_k_idx][3],
-      //                 alpha[a_k_idx + 1][1], alpha[a_k_idx + 1][3]};
+      //          a13 = {alpha[k_idx][1], alpha[k_idx][3],
+      //                 alpha[k_idx + 1][1], alpha[k_idx + 1][3]};
       float32x4_t g2 = vreinterpretq_f32_u8(
           vqtbl1q_u8(vreinterpretq_u8_f32(pdf4[i].val[j]), rev_idx));
-      float32x4_t a13 = vuzp2q_f32(alpha[a_k_idx], alpha[a_k_idx + 1]);
+      float32x4_t a13 = vuzp2q_f32(alpha[k_idx], alpha[k_idx + 1]);
       float32x4_t right_1 = vaddq_f32(g2, a13);
-      alpha[a_kp1_idx] = vmaxq_f32(left_1, right_1);
+      alpha[kp1_idx] = vmaxq_f32(left_1, right_1);
 
       // We need  g1 = {gamma[g_k_idx][1], gamma[g_k_idx + 1][1],
       //                gamma[g_k_idx + 2][1], gamma[g_k_idx + 3][1]};
@@ -215,12 +212,14 @@ void decode_step(const float32x4_t *sys, const float32x4_t *par,
       //                gamma[g_k_idx + 2][3], gamma[g_k_idx + 3][3]};
       // which is g0 above
       float32x4_t right_2 = vaddq_f32(g0, a13);
-      alpha[a_kp1_idx + 1] = vmaxq_f32(left_2, right_2);
+      alpha[kp1_idx + 1] = vmaxq_f32(left_2, right_2);
     }
   }
 
   // Accumulate the state transition probabilities backwards through the state
   // transition trellis starting from the beginning of the precomputed tail
+  // and calculate the conditional probabilities of each bit being either 0
+  // or 1
   constexpr uint8x16_t idx_0312 = {0, 1, 2, 3, 12, 13, 14, 15,
                                    4, 5, 6, 7, 8,  9,  10, 11};
   constexpr uint8x16_t idx_3021 = {12, 13, 14, 15, 0, 1, 2, 3,
@@ -229,13 +228,19 @@ void decode_step(const float32x4_t *sys, const float32x4_t *par,
                                    12, 13, 14, 15, 0, 1, 2, 3};
   constexpr uint8x16_t idx_1203 = {4, 5, 6, 7, 8,  9,  10, 11,
                                    0, 1, 2, 3, 12, 13, 14, 15};
+  constexpr uint8x16_t idx_0220 = {0, 1, 2,  3,  8, 9, 10, 11,
+                                   8, 9, 10, 11, 0, 1, 2,  3};
+  constexpr uint8x16_t idx_3113 = {12, 13, 14, 15, 4,  5,  6,  7,
+                                   4,  5,  6,  7,  12, 13, 14, 15};
+
+  float32x4x2_t beta_k;
+  float32x4x2_t beta_kp1 = {beta_tail[0], beta_tail[1]};
 
-  beta[8 * k4] = beta_tail[0];
-  beta[8 * k4 + 1] = beta_tail[1];
   for (int32_t i = k4 - 1; i >= 0; i--) {
+    float32x4_t prob_0;
+    float32x4_t prob_1;
     for (int32_t j = 3; j >= 0; j--) {
-      b_k_idx = 8 * i + j * 2;
-      b_kp1_idx = b_k_idx + 2;
+      k_idx = 8 * i + j * 2;
 
       // We need  g01_02 = {gamma[g_k_idx][0], gamma[g_k_idx][2],
       //                    gamma[g_k_idx + 1][0], gamma[g_k_idx + 1][2]};
@@ -243,7 +248,7 @@ void decode_step(const float32x4_t *sys, const float32x4_t *par,
       //                 beta[b_kp1_idx][1], beta[b_kp1_idx][1]};
       float32x4_t g01_02 = vreinterpretq_f32_u8(
           vqtbl1q_u8(vreinterpretq_u8_f32(pdf4[i].val[j]), idx_0312));
-      float32x4_t b01 = vzip1q_f32(beta[b_kp1_idx], beta[b_kp1_idx]);
+      float32x4_t b01 = vzip1q_f32(beta_kp1.val[0], beta_kp1.val[0]);
       float32x4_t left_1 = vaddq_f32(g01_02, b01);
 
       // We need  g13 = {gamma[g_k_idx][1], gamma[g_k_idx][3],
@@ -252,9 +257,9 @@ void decode_step(const float32x4_t *sys, const float32x4_t *par,
       //                    beta[b_kp1_idx + 1][1], beta[b_kp1_idx + 1][1]};
       float32x4_t g13 = vreinterpretq_f32_u8(
           vqtbl1q_u8(vreinterpretq_u8_f32(pdf4[i].val[j]), idx_3021));
-      float32x4_t bp1_01 = vzip1q_f32(beta[b_kp1_idx + 1], beta[b_kp1_idx + 1]);
+      float32x4_t bp1_01 = vzip1q_f32(beta_kp1.val[1], beta_kp1.val[1]);
       float32x4_t right_1 = vaddq_f32(g13, bp1_01);
-      beta[b_k_idx] = vmaxq_f32(left_1, right_1);
+      beta_k.val[0] = vmaxq_f32(left_1, right_1);
 
       // We need  g23_02 = {gamma[g_k_idx + 2][0], gamma[g_k_idx + 2][2],
       //                    gamma[g_k_idx + 3][0], gamma[g_k_idx + 3][2]};
@@ -262,7 +267,7 @@ void decode_step(const float32x4_t *sys, const float32x4_t *par,
       //                 beta[b_kp1_idx][3], beta[b_kp1_idx][3]};
       float32x4_t g23_02 = vreinterpretq_f32_u8(
           vqtbl1q_u8(vreinterpretq_u8_f32(pdf4[i].val[j]), idx_2130));
-      float32x4_t b23 = vzip2q_f32(beta[b_kp1_idx], beta[b_kp1_idx]);
+      float32x4_t b23 = vzip2q_f32(beta_kp1.val[0], beta_kp1.val[0]);
       float32x4_t left_2 = vaddq_f32(g23_02, b23);
 
       // We need  g23_13 = {gamma[g_k_idx + 2][1], gamma[g_k_idx + 2][3],
@@ -271,65 +276,49 @@ void decode_step(const float32x4_t *sys, const float32x4_t *par,
       //                    beta[b_kp1_idx + 1][3], beta[b_kp1_idx + 1][3]};
       float32x4_t g23_13 = vreinterpretq_f32_u8(
           vqtbl1q_u8(vreinterpretq_u8_f32(pdf4[i].val[j]), idx_1203));
-      float32x4_t bp1_23 = vzip2q_f32(beta[b_kp1_idx + 1], beta[b_kp1_idx + 1]);
+      float32x4_t bp1_23 = vzip2q_f32(beta_kp1.val[1], beta_kp1.val[1]);
       float32x4_t right_2 = vaddq_f32(g23_13, bp1_23);
-      beta[b_k_idx + 1] = vmaxq_f32(left_2, right_2);
-    }
-  }
-
-  // Finally calculate the conditional probabilities of each bit being either 0
-  // or 1
-  constexpr uint8x16_t idx_0220 = {0, 1, 2,  3,  8, 9, 10, 11,
-                                   8, 9, 10, 11, 0, 1, 2,  3};
-  constexpr uint8x16_t idx_3113 = {12, 13, 14, 15, 4,  5,  6,  7,
-                                   4,  5,  6,  7,  12, 13, 14, 15};
+      beta_k.val[1] = vmaxq_f32(left_2, right_2);
 
-  for (uint32_t i = 0; i < k4; i++) {
-    float32x4_t prob_0;
-    float32x4_t prob_1;
-    for (uint32_t j = 0; j < 4; j++) {
-      a_k_idx = 8 * i + j * 2;
-      b_kp1_idx = a_k_idx + 2;
-
-      // We need  a02 = {alpha[a_k_idx][0], alpha[a_k_idx][2],
-      //                 alpha[a_k_idx + 1][0], alpha[a_k_idx + 1][2]};
-      //          a13 = {alpha[a_k_idx][1], alpha[a_k_idx][3],
-      //                 alpha[a_k_idx + 1][1], alpha[a_k_idx + 1][3]};
+      // We need  a02 = {alpha[k_idx][0], alpha[k_idx][2],
+      //                 alpha[k_idx + 1][0], alpha[k_idx + 1][2]};
+      //          a13 = {alpha[k_idx][1], alpha[k_idx][3],
+      //                 alpha[k_idx + 1][1], alpha[k_idx + 1][3]};
       //       b02_13 = {beta[b_kp1_idx][0], beta[b_kp1_idx + 1][1],
       //                 beta[b_kp1_idx][2], beta[b_kp1_idx + 1][3]};
       //       b13_02 = {beta[b_kp1_idx + 1][0], beta[b_kp1_idx][1],
       //                 beta[b_kp1_idx + 1][2], beta[b_kp1_idx][3]};
-      float32x4_t a02 = vuzp1q_f32(alpha[a_k_idx], alpha[a_k_idx + 1]);
-      float32x4_t a13 = vuzp2q_f32(alpha[a_k_idx], alpha[a_k_idx + 1]);
+      float32x4_t a02 = vuzp1q_f32(alpha[k_idx], alpha[k_idx + 1]);
+      float32x4_t a13 = vuzp2q_f32(alpha[k_idx], alpha[k_idx + 1]);
       float32x4_t b02_13 =
-          vtrn2q_f32(vrev64q_f32(beta[b_kp1_idx]), beta[b_kp1_idx + 1]);
+          vtrn2q_f32(vrev64q_f32(beta_kp1.val[0]), beta_kp1.val[1]);
       float32x4_t b13_02 =
-          vtrn2q_f32(vrev64q_f32(beta[b_kp1_idx + 1]), beta[b_kp1_idx]);
+          vtrn2q_f32(vrev64q_f32(beta_kp1.val[1]), beta_kp1.val[0]);
 
       // Find the most probable path in which bit i was a 0
       // We need  g01_01 = {gamma[g_k_idx][0], gamma[g_k_idx + 1][1],
       //                   gamma[g_k_idx + 2][0], gamma[g_k_idx + 3][1]};
-      //          g32_32 = {gamma[g_k_idx][3], gamma[g_k_idx + 1][2],
-      //                   gamma[g_k_idx + 2][3], gamma[g_k_idx + 3][2]};
       float32x4_t g01_01 = vreinterpretq_f32_u8(
           vqtbl1q_u8(vreinterpretq_u8_f32(pdf4[i].val[j]), idx_0220));
-      float32x4_t left_1 = vaddq_f32(vaddq_f32(a02, b02_13), g01_01);
-      float32x4_t right_1 = vaddq_f32(vaddq_f32(a13, b13_02), g01_01);
+      left_1 = vaddq_f32(vaddq_f32(a02, b02_13), g01_01);
+      right_1 = vaddq_f32(vaddq_f32(a13, b13_02), g01_01);
       prob_0[j] = vmaxvq_f32(vmaxq_f32(left_1, right_1));
 
       // Find the most probable path in which bit i was a 1
       // We need  g10_10 = {gamma[g_k_idx][1], gamma[g_k_idx + 1][0],
       //                   gamma[g_k_idx + 2][1], gamma[g_k_idx + 3][0]};
-      //          g23_23 = {gamma[g_k_idx][2], gamma[g_k_idx + 1][3],
-      //                   gamma[g_k_idx + 2][2], gamma[g_k_idx + 3][3]};
       float32x4_t g10_10 = vreinterpretq_f32_u8(
           vqtbl1q_u8(vreinterpretq_u8_f32(pdf4[i].val[j]), idx_3113));
-      float32x4_t left_2 = vaddq_f32(vaddq_f32(a02, b13_02), g10_10);
-      float32x4_t right_2 = vaddq_f32(vaddq_f32(a13, b02_13), g10_10);
-
+      left_2 = vaddq_f32(vaddq_f32(a02, b13_02), g10_10);
+      right_2 = vaddq_f32(vaddq_f32(a13, b02_13), g10_10);
       prob_1[j] = vmaxvq_f32(vmaxq_f32(left_2, right_2));
+
+      // Store the current value of beta to use in the next
+      // round of calculations
+      beta_kp1 = beta_k;
     }
-    // Calculate LLR
+
+    // Calculate the LLRs
     llr[i] = vsubq_f32(prob_0, prob_1);
   }
 }
@@ -374,17 +363,14 @@ void armral::turbo::decode_block(const int8_t *sys, const int8_t *par,
 
   // Allocate space for log likelihood ratios from both stages of decoding
   auto l1_uky = allocate_uninitialized<float32x4_t>(allocator, k4);
-  auto l2_uky = allocate_uninitialized<float32x4_t>(allocator, k);
-  auto prev_l2_uky = allocate_zeroed<float32x4_t>(allocator, k);
+  auto l2_uky = allocate_uninitialized<float32x4_t>(allocator, k4);
+  auto prev_l2_uky = allocate_zeroed<float32x4_t>(allocator, k4);
 
-  // Allocate space to hold alpha, beta and gamma
+  // Allocate space to hold alpha and gamma
   // alpha stores the forward-accumulated state probabilities for each decoded
   // bit, where the LTE encoder has 8 states and there are k+3 bits to decode
   // plus the starting condition
   auto alpha = allocate_uninitialized<float32x4_t>(allocator, 8 * k4 + 2);
-  // beta stores the backwards-accumulated state probabilities for each decoded
-  // bit
-  auto beta = allocate_uninitialized<float32x4_t>(allocator, 8 * k4 + 2);
   // gamma stores the conditional state transition probabilities for each of the
   // k+3 bits to decode
   auto gamma = allocate_uninitialized<float32x4x4_t>(allocator, k4);
@@ -426,11 +412,11 @@ void armral::turbo::decode_block(const int8_t *sys, const int8_t *par,
   // Generate the permutation vector for the input value of k
   // Find the index into the array of parameter arrays corresponding
   // to the current k. Subtract 40 because k=40 is the lowest value.
-  int param_idx = armral_turbo_tables::perm_params_lookup[(k - 40) >> 3];
+  int param_idx = armral::turbo::perm_params_lookup[(k - 40) >> 3];
   // and extract the correct values of f1 and f2 to build the
   // interleaving polynomial
-  uint16_t f1 = armral_turbo_tables::perm_params[param_idx][0];
-  uint16_t f2 = armral_turbo_tables::perm_params[param_idx][1];
+  uint16_t f1 = armral::turbo::perm_params[param_idx][0];
+  uint16_t f2 = armral::turbo::perm_params[param_idx][1];
   for (uint32_t i = 0; i < k; i++) {
     perm_idx[i] = generate_perm_idx(i, f1, f2, k);
   }
@@ -479,8 +465,7 @@ void armral::turbo::decode_block(const int8_t *sys, const int8_t *par,
   while (num_iter < max_iter) {
     // Run the first decoder step
     decode_step(sys_f32.get(), par_f32.get(), extrinsic.get(), k4, l1_uky.get(),
-                alpha.get(), beta.get(), beta_tail, gamma.get(),
-                channel_reliability);
+                alpha.get(), beta_tail, gamma.get(), channel_reliability);
 
     // Compute the new extrinsic information to pass into the second decoder
     update_extrinsic(k4, l1_uky.get(), extrinsic.get(), sys_f32.get());
@@ -499,8 +484,8 @@ void armral::turbo::decode_block(const int8_t *sys, const int8_t *par,
 
     // Run the second decoder step
     decode_step(perm_sys.get(), itl_f32.get(), perm_extrinsic.get(), k4,
-                l2_uky.get(), alpha.get(), beta.get(), perm_beta_tail,
-                gamma.get(), channel_reliability);
+                l2_uky.get(), alpha.get(), perm_beta_tail, gamma.get(),
+                channel_reliability);
 
     // Compute the new extrinsic information to pass back into the first encoder
     update_extrinsic(k4, l2_uky.get(), perm_extrinsic.get(), perm_sys.get());
diff --git a/src/UpperPHY/Turbo/arm_turbo_encoder.cpp b/src/UpperPHY/Turbo/arm_turbo_encoder.cpp
index 98e0e053eba3e256156b05b3d6bac13caf092152..62acc617b2104e92f997d956df835019d2fea68d 100644
--- a/src/UpperPHY/Turbo/arm_turbo_encoder.cpp
+++ b/src/UpperPHY/Turbo/arm_turbo_encoder.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "turbo_code.hpp"
@@ -20,9 +20,9 @@ inline void rsc_encode(const uint8_t *c, uint32_t k_bytes, uint8_t &state,
   for (uint32_t k = 0; k < k_bytes; k++) {
     uint8_t input_block = c[k];
     // use input byte
-    z[k] = armral_turbo_tables::encoded_bytes[8 * input_block + state];
+    z[k] = armral::turbo::encoded_bytes[8 * input_block + state];
     // update encoder state
-    state = armral_turbo_tables::new_state_bytes[8 * input_block + state];
+    state = armral::turbo::new_state_bytes[8 * input_block + state];
   }
 }
 
@@ -33,9 +33,9 @@ inline void trellis_encode(uint8_t &state, uint8_t &x, uint8_t &z) {
   z = 0;
   // generate 3 bits of output in x and z
   for (int i = 2; i >= 0; i--) {
-    uint8_t symbol = armral_turbo_tables::trellis_output_symbol[8 * i + state];
+    uint8_t symbol = armral::turbo::trellis_output_symbol[8 * i + state];
     x |= symbol;
-    symbol = armral_turbo_tables::trellis_encoded_symbol[8 * i + state];
+    symbol = armral::turbo::trellis_encoded_symbol[8 * i + state];
     z |= symbol;
     // the state transitions here are:
     // old state  0 1 2 3 4 5 6 7
@@ -86,11 +86,11 @@ inline void terminate_trellis(uint8_t &state0, uint8_t &state1, uint8_t *d0,
 inline void interleave(const uint8_t *c, uint8_t *c_prime, uint32_t k) {
   // find the index into the array of parameter arrays corresponding
   // to the current k. Subtract 40 because k=40 is the lowest value.
-  int param_idx = armral_turbo_tables::perm_params_lookup[(k - 40) >> 3];
+  int param_idx = armral::turbo::perm_params_lookup[(k - 40) >> 3];
   // and extract the correct values of f1 and f2 to build the
   // interleaving polynomial
-  uint16_t f1 = armral_turbo_tables::perm_params[param_idx][0];
-  uint16_t f2 = armral_turbo_tables::perm_params[param_idx][1];
+  uint16_t f1 = armral::turbo::perm_params[param_idx][0];
+  uint16_t f2 = armral::turbo::perm_params[param_idx][1];
   for (uint32_t i = 0; i < k; i++) {
     // 0 <= perm_idx < 6144 but f2*i*i may be much larger
     int perm_idx = armral::turbo::generate_perm_idx(i, f1, f2, k);
diff --git a/src/UpperPHY/Turbo/arm_turbo_rate_matching.cpp b/src/UpperPHY/Turbo/arm_turbo_rate_matching.cpp
index 9e8c54c072222b1f41d85af27ddfba6dce9a4a82..7048c97e422beda91cad3270a925fdfed46345fd 100644
--- a/src/UpperPHY/Turbo/arm_turbo_rate_matching.cpp
+++ b/src/UpperPHY/Turbo/arm_turbo_rate_matching.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "turbo_tables.hpp"
@@ -76,22 +76,21 @@ static void subblock_interleave(uint32_t d, uint32_t kw, const uint8_t *d0,
   dummy2[ndb] = 0xF0;
 
   // Number of rows of the information bit matrix
-  assert(kpi % armral_turbo_tables::ctc == 0);
-  const uint32_t rtc = kpi / armral_turbo_tables::ctc;
+  assert(kpi % armral::turbo::ctc == 0);
+  const uint32_t rtc = kpi / armral::turbo::ctc;
 
   // Perform inter-column permutation for each row of d^(0)_k and d^(1)_k
   for (uint32_t i = 0; i < rtc; ++i) {
-    for (uint32_t j = 0; j < armral_turbo_tables::ctc; ++j) {
-      uint32_t idx =
-          (armral_turbo_tables::p[j] + i * armral_turbo_tables::ctc) / 8;
-      uint32_t jdx = 7 & ~armral_turbo_tables::p[j];
+    for (uint32_t j = 0; j < armral::turbo::ctc; ++j) {
+      uint32_t idx = (armral::turbo::p[j] + i * armral::turbo::ctc) / 8;
+      uint32_t jdx = 7 & ~armral::turbo::p[j];
       uint32_t y0bit = (y0[idx] >> jdx) & 1;
       uint32_t y1bit = (y1[idx] >> jdx) & 1;
       uint32_t yperm_jdx = 7 & ~j;
-      work_buffers.y0_perm[(j + i * armral_turbo_tables::ctc) / 8] |=
-          y0bit << yperm_jdx;
-      work_buffers.y1_perm[(j + i * armral_turbo_tables::ctc) / 8] |=
-          y1bit << yperm_jdx;
+      work_buffers.y0_perm[(j + i * armral::turbo::ctc) / 8] |= y0bit
+                                                                << yperm_jdx;
+      work_buffers.y1_perm[(j + i * armral::turbo::ctc) / 8] |= y1bit
+                                                                << yperm_jdx;
     }
   }
 
@@ -120,14 +119,13 @@ static void subblock_interleave(uint32_t d, uint32_t kw, const uint8_t *d0,
 
   // Read out permuted matrix column by column for y^(0) and y^(1)
   // Perform permutation for y^(2)
-  for (uint32_t j = 0; j < armral_turbo_tables::ctc; ++j) {
+  for (uint32_t j = 0; j < armral::turbo::ctc; ++j) {
     for (uint32_t i = 0; i < rtc; ++i) {
-      uint32_t pi =
-          (armral_turbo_tables::p[j] + i * armral_turbo_tables::ctc + 1) % kpi;
+      uint32_t pi = (armral::turbo::p[j] + i * armral::turbo::ctc + 1) % kpi;
       uint32_t vidx = (i + j * rtc) / 8;
       uint32_t vjdx = 7 & ~(i + j * rtc);
-      uint32_t y0idx = (j + i * armral_turbo_tables::ctc) / 8;
-      uint32_t y0jdx = 7 & ~(j + i * armral_turbo_tables::ctc);
+      uint32_t y0idx = (j + i * armral::turbo::ctc) / 8;
+      uint32_t y0jdx = 7 & ~(j + i * armral::turbo::ctc);
       uint32_t y2idx = pi / 8;
       uint32_t y2jdx = 7 & ~pi;
       v0[vidx] |= ((work_buffers.y0_perm[y0idx] >> y0jdx) & 1) << vjdx;
@@ -212,9 +210,8 @@ armral_status rate_matching(uint32_t d, uint32_t e, uint32_t rv,
   assert(rv >= 0 && rv <= 3);
 
   // The minimum number of rows which gives rtc * ctc >= d.
-  const uint32_t rtc =
-      (d + armral_turbo_tables::ctc - 1) / armral_turbo_tables::ctc;
-  const uint32_t kpi = rtc * armral_turbo_tables::ctc;
+  const uint32_t rtc = (d + armral::turbo::ctc - 1) / armral::turbo::ctc;
+  const uint32_t kpi = rtc * armral::turbo::ctc;
   const uint32_t kw = 3 * kpi;
   const uint32_t kpib = kpi / 8;
   const uint32_t kwb = kw / 8;
diff --git a/src/UpperPHY/Turbo/arm_turbo_rate_recovery.cpp b/src/UpperPHY/Turbo/arm_turbo_rate_recovery.cpp
index 85212d663a9b3db20b72ec66863e4649912956fa..416cd384995e5058ff87f4f878d9e6f2af1c6ac7 100644
--- a/src/UpperPHY/Turbo/arm_turbo_rate_recovery.cpp
+++ b/src/UpperPHY/Turbo/arm_turbo_rate_recovery.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "armral.h"
@@ -26,22 +26,21 @@ static void generate_dummy_bits_tracking(uint32_t d, uint32_t rtc,
   // in the turbo code. Where these are in the overall input data stream is
   // determined in a manner similar to the encoding, described in section
   // 5.1.4.1.1 in 3GPP specification 36.212.
-  const uint32_t kpi = rtc * armral_turbo_tables::ctc;
+  const uint32_t kpi = rtc * armral::turbo::ctc;
   const uint32_t nd = kpi - d;
 
   // Tag nd elements as dummy bits.
   // dummy0 and dummy1 are permuted and transposed.
   for (uint32_t i = 0; i < nd; ++i) {
-    work_buffers.dummy0[armral_turbo_tables::p[i] * rtc] = 1;
-    work_buffers.dummy1[armral_turbo_tables::p[i] * rtc] = 1;
+    work_buffers.dummy0[armral::turbo::p[i] * rtc] = 1;
+    work_buffers.dummy1[armral::turbo::p[i] * rtc] = 1;
   }
   // Permutation for dummy2
   for (uint32_t i = 0; i < kpi; ++i) {
     // TODO: We don't need to go through all of kpi here. We should be able to
     //       identify where each of the nd < crc = 32 bits goes.
-    uint32_t pi = (armral_turbo_tables::p[i / rtc] +
-                   armral_turbo_tables::ctc * (i % rtc) + 1) %
-                  kpi;
+    uint32_t pi =
+        (armral::turbo::p[i / rtc] + armral::turbo::ctc * (i % rtc) + 1) % kpi;
     if (pi < nd) {
       work_buffers.dummy2[i] = 1;
     }
@@ -70,23 +69,22 @@ subblock_deinterleave(uint32_t d, uint32_t rtc, const int8_t *v0,
                       int8_t *d1, int8_t *d2,
                       subblock_deinterleave_work_buffers work_buffers) {
 
-  const uint32_t kpi = rtc * armral_turbo_tables::ctc;
+  const uint32_t kpi = rtc * armral::turbo::ctc;
   const uint32_t nd = kpi - d;
 
   // Reverse permutation and transpose for d^(0)_k and d^(1)_k
   for (uint32_t i = 0; i < rtc; ++i) {
-    for (uint32_t j = 0; j < armral_turbo_tables::ctc; ++j) {
-      uint32_t k = j + i * armral_turbo_tables::ctc;
-      work_buffers.y0[k] = v0[i + armral_turbo_tables::p[j] * rtc];
-      work_buffers.y1[k] = v1[i + armral_turbo_tables::p[j] * rtc];
+    for (uint32_t j = 0; j < armral::turbo::ctc; ++j) {
+      uint32_t k = j + i * armral::turbo::ctc;
+      work_buffers.y0[k] = v0[i + armral::turbo::p[j] * rtc];
+      work_buffers.y1[k] = v1[i + armral::turbo::p[j] * rtc];
     }
   }
 
   // Reverse permutation for d^(2)_k
   for (uint32_t i = 0; i < kpi; ++i) {
-    uint32_t pi = (armral_turbo_tables::p[i / rtc] +
-                   armral_turbo_tables::ctc * (i % rtc) + 1) %
-                  kpi;
+    uint32_t pi =
+        (armral::turbo::p[i / rtc] + armral::turbo::ctc * (i % rtc) + 1) % kpi;
     work_buffers.y2[pi] = v2[i];
   }
 
@@ -133,17 +131,16 @@ static void bit_deselection(uint32_t ncb, uint32_t k0, uint32_t e,
 }
 
 template<typename Allocator>
-armral_status turbo_rate_recovery(uint32_t d, uint32_t e, uint32_t rv,
-                                  const int8_t *src, int8_t *dst0, int8_t *dst1,
-                                  int8_t *dst2, Allocator &allocator) {
+armral_status rate_recovery(uint32_t d, uint32_t e, uint32_t rv,
+                            const int8_t *src, int8_t *dst0, int8_t *dst1,
+                            int8_t *dst2, Allocator &allocator) {
   assert(d > 0);
   assert(e > 0);
   assert(rv >= 0 && rv <= 3);
 
   // The minimum number of rows which gives rtc * ctc >= d.
-  const uint32_t rtc =
-      (d + armral_turbo_tables::ctc - 1) / armral_turbo_tables::ctc;
-  const uint32_t kpi = rtc * armral_turbo_tables::ctc;
+  const uint32_t rtc = (d + armral::turbo::ctc - 1) / armral::turbo::ctc;
+  const uint32_t kpi = rtc * armral::turbo::ctc;
   const uint32_t kw = 3 * kpi;
 
   auto dummy = allocate_zeroed<int8_t>(allocator, kpi * 3);
@@ -197,8 +194,8 @@ armral_status armral_turbo_rate_recovery(uint32_t d, uint32_t e, uint32_t rv,
                                          const int8_t *src, int8_t *dst0,
                                          int8_t *dst1, int8_t *dst2) {
   heap_allocator allocator{};
-  return armral::turbo::turbo_rate_recovery(d, e, rv, src, dst0, dst1, dst2,
-                                            allocator);
+  return armral::turbo::rate_recovery(d, e, rv, src, dst0, dst1, dst2,
+                                      allocator);
 }
 
 armral_status armral_turbo_rate_recovery_noalloc(uint32_t d, uint32_t e,
@@ -206,14 +203,14 @@ armral_status armral_turbo_rate_recovery_noalloc(uint32_t d, uint32_t e,
                                                  int8_t *dst0, int8_t *dst1,
                                                  int8_t *dst2, void *buffer) {
   buffer_bump_allocator allocator{buffer};
-  return armral::turbo::turbo_rate_recovery(d, e, rv, src, dst0, dst1, dst2,
-                                            allocator);
+  return armral::turbo::rate_recovery(d, e, rv, src, dst0, dst1, dst2,
+                                      allocator);
 }
 
 uint32_t armral_turbo_rate_recovery_noalloc_buffer_size(uint32_t d, uint32_t e,
                                                         uint32_t rv) {
   counting_allocator allocator{};
-  (void)armral::turbo::turbo_rate_recovery(d, e, rv, nullptr, nullptr, nullptr,
-                                           nullptr, allocator);
+  (void)armral::turbo::rate_recovery(d, e, rv, nullptr, nullptr, nullptr,
+                                     nullptr, allocator);
   return allocator.required_bytes();
 }
diff --git a/src/UpperPHY/Turbo/turbo_code.hpp b/src/UpperPHY/Turbo/turbo_code.hpp
index 178780a161abaaf268bb6e59855001cf03eea711..f389feddf64622492d9b5342242fd57e1886729d 100644
--- a/src/UpperPHY/Turbo/turbo_code.hpp
+++ b/src/UpperPHY/Turbo/turbo_code.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/src/UpperPHY/Turbo/turbo_tables.hpp b/src/UpperPHY/Turbo/turbo_tables.hpp
index 1f9dd5c76a9c44db91370846373ef6d3d7eb41c0..1a59ae80dd3eb26095de8f4586fc0248e692aa0f 100644
--- a/src/UpperPHY/Turbo/turbo_tables.hpp
+++ b/src/UpperPHY/Turbo/turbo_tables.hpp
@@ -1,11 +1,11 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #pragma once
 
-namespace armral_turbo_tables {
+namespace armral::turbo {
 
 // Precomputed encoded outputs from the RSC encoder indexed by the
 // current state of the internal registers (curr_state) and which bit
@@ -381,4 +381,4 @@ static constexpr uint8_t p[ctc] = {0,  16, 8,  24, 4,  20, 12, 28, 2,  18, 10,
                                    26, 6,  22, 14, 30, 1,  17, 9,  25, 5,  21,
                                    13, 29, 3,  19, 11, 27, 7,  23, 15, 31};
 
-} // namespace armral_turbo_tables
+} // namespace armral::turbo
diff --git a/src/intrinsics.h b/src/intrinsics.h
index b2dc080e8b67303c46e554c19b346dd8ec8d4285..7fd26f06203af680140ea0191cccd25ac937c3a1 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
@@ -380,6 +380,33 @@ vst1q_f32_x2(float32_t *dest, float32x4x2_t value) {
 #endif
 
 #ifdef __ARM_FEATURE_SVE
+static inline svuint16_t __attribute__((always_inline, artificial))
+svld1rh_u16(svbool_t pg, const uint8_t *ptr) {
+  svuint16_t ret;
+  asm("ld1rh {%0.h}, %1/Z, %2"
+      : "=w"(ret)
+      : "Upl"(pg), "Q"(*(const uint16_t *)ptr));
+  return ret;
+}
+
+static inline svuint32_t __attribute__((always_inline, artificial))
+svld1rw_u32(svbool_t pg, const uint8_t *ptr) {
+  svuint32_t ret;
+  asm("ld1rw {%0.s}, %1/Z, %2"
+      : "=w"(ret)
+      : "Upl"(pg), "Q"(*(const uint32_t *)ptr));
+  return ret;
+}
+
+static inline svuint64_t __attribute__((always_inline, artificial))
+svld1rd_u64(svbool_t pg, const uint8_t *ptr) {
+  svuint64_t ret;
+  asm("ld1rd {%0.d}, %1/Z, %2"
+      : "=w"(ret)
+      : "Upl"(pg), "Q"(*(const uint64_t *)ptr));
+  return ret;
+}
+
 // Reverses pairs of floats within a SVE vector
 // [a.re a.im b.re b.im] --> [a.im a.re b.im b.re]
 static inline svfloat32_t __attribute__((always_inline, artificial))
diff --git a/src/utils/allocators.hpp b/src/utils/allocators.hpp
index 3a13bc2866d87ad44045da6edbb2ac9cb0df93bc..e664173251e336e809d5dbe25aec2a81d9d8fee5 100644
--- a/src/utils/allocators.hpp
+++ b/src/utils/allocators.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #pragma once
diff --git a/src/utils/cmplx_arith_f32.hpp b/src/utils/cmplx_arith_f32.hpp
index 5e3eb4b6114c4ca2a966adffd0bce6ea5d72baf3..32644da766ed0299456c209b406a979437b3b4ae 100644
--- a/src/utils/cmplx_arith_f32.hpp
+++ b/src/utils/cmplx_arith_f32.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #pragma once
diff --git a/src/utils/vec_mul.hpp b/src/utils/vec_mul.hpp
index f43368577beefa067913ee916b684c0aed92e1d2..2c4896a52029ed55ccee45b0397212ceec962a54 100644
--- a/src/utils/vec_mul.hpp
+++ b/src/utils/vec_mul.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #pragma once
diff --git a/test/CRC/main.cpp b/test/CRC/main.cpp
index ee9882d52bf849514b8b636c09d6533d9cde6fda..ed3941a277f3ef9e73ceccf7aab095fc3eaa501d 100644
--- a/test/CRC/main.cpp
+++ b/test/CRC/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/test/ConvCoding/decoding/main.cpp b/test/ConvCoding/decoding/main.cpp
index b20a62ad3eba86e9a4ae31c18ac1e4774a1da647..d768fbe6bce1dd611dc39fc56d5ef36a869a44e0 100644
--- a/test/ConvCoding/decoding/main.cpp
+++ b/test/ConvCoding/decoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "bit_utils.hpp"
@@ -73,8 +73,10 @@ static bool run_convolutional_decoding_test(
   bool passed = true;
 
   if (ret != ARMRAL_SUCCESS) {
+    // GCOVR_EXCL_START
     printf("Error! [%s] k=%u did not return ARMRAL_SUCCESS\n", name, k);
     passed = false;
+    // GCOVR_EXCL_STOP
   } else {
     printf("[%s] k=%u\n", name, k);
     auto check_dst =
diff --git a/test/ConvCoding/encoding/main.cpp b/test/ConvCoding/encoding/main.cpp
index 1087714ec945dd7b0b9a49a874faff90c7011681..fab64d1888c66ce983789cf74e5496f9442282de 100644
--- a/test/ConvCoding/encoding/main.cpp
+++ b/test/ConvCoding/encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "bit_utils.hpp"
@@ -70,9 +70,11 @@ static bool run_convolutional_encoding_test(int k) {
 
   bool passed = true;
   if (ret != ARMRAL_SUCCESS) {
+    // GCOVR_EXCL_START
     const char *name = "Convolutional_Encoding";
     printf("Error! [%s_%d] did not return ARMRAL_SUCCESS\n", name, k);
     passed = false;
+    // GCOVR_EXCL_STOP
   } else {
     auto check_dst0 =
         check_results_u8("CONVOLUTIONAL ENCODING (STREAM D0)", dst0.data(),
diff --git a/test/Correlation/main.cpp b/test/Correlation/main.cpp
index 707da33cf7d433d076559d7bddc10faa6b642984..192ba5f6d751e1198975fa3c90b7a24ca8fa373c 100644
--- a/test/Correlation/main.cpp
+++ b/test/Correlation/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "qint64.hpp"
diff --git a/test/Demodulation/main.cpp b/test/Demodulation/main.cpp
index 73a054533cf72f86302fee167d8a0419be1852ed..4833b650017f4b24b1bfb7badfaf1c4d16db469c 100644
--- a/test/Demodulation/main.cpp
+++ b/test/Demodulation/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cs16_utils.hpp"
diff --git a/test/ElemWiseVectorMult/vecMul16/main.cpp b/test/ElemWiseVectorMult/vecMul16/main.cpp
index ca798fc50fa6af2a91afb35a061765fb547e6bd3..ea5da35dbb6e2d5451311b176e04b577a4bc6ba4 100644
--- a/test/ElemWiseVectorMult/vecMul16/main.cpp
+++ b/test/ElemWiseVectorMult/vecMul16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "qint64.hpp"
@@ -87,7 +87,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_mul_test(n);
   }
   const int saturation_len[] = {1, 3, 8, 9};
diff --git a/test/ElemWiseVectorMult/vecMul16_2/main.cpp b/test/ElemWiseVectorMult/vecMul16_2/main.cpp
index 623335e11a99e7ad308c4fa109a3951fe9edfb47..ed6cb76af5394a64d2875db08cccf8e2bf8da82b 100644
--- a/test/ElemWiseVectorMult/vecMul16_2/main.cpp
+++ b/test/ElemWiseVectorMult/vecMul16_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "qint64.hpp"
@@ -104,7 +104,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_mul_test(n);
   }
   const int saturation_len[] = {
diff --git a/test/ElemWiseVectorMult/vecMul32/main.cpp b/test/ElemWiseVectorMult/vecMul32/main.cpp
index fc988471b2591323e7565d3a228522ca5a5d3996..9ac33ac5adabe9fba26399604f2e085303100bda 100644
--- a/test/ElemWiseVectorMult/vecMul32/main.cpp
+++ b/test/ElemWiseVectorMult/vecMul32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
@@ -30,7 +30,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_mul_test(n);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/ElemWiseVectorMult/vecMul32_2/main.cpp b/test/ElemWiseVectorMult/vecMul32_2/main.cpp
index f5fda1c9fb9d8cbade013d198528e528769e5300..323367c28898d3d5da6c7dbec3232d736b63e588 100644
--- a/test/ElemWiseVectorMult/vecMul32_2/main.cpp
+++ b/test/ElemWiseVectorMult/vecMul32_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_mul_test(n);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/FFT/cf32/main.cpp b/test/FFT/cf32/main.cpp
index ac0c3818fe6f3ec3d96ca308a785ab2d4b3c9746..ed8483c20430d7b86fa498cd6a8d7caa2a481f2b 100644
--- a/test/FFT/cf32/main.cpp
+++ b/test/FFT/cf32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cf32_utils.hpp"
diff --git a/test/FFT/cs16/main.cpp b/test/FFT/cs16/main.cpp
index 3fc058ef89bd9a61564e65e9f72c92dd59c64114..6f88d577f0404ddf1c2565c74a077096a7a2aadc 100644
--- a/test/FFT/cs16/main.cpp
+++ b/test/FFT/cs16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cf32_utils.hpp"
diff --git a/test/FIR/arm_fir_filter_cf32/main.cpp b/test/FIR/arm_fir_filter_cf32/main.cpp
index f598a4b83327bbea2429bae5b05c8a99e0520559..c8c3643f5fff2b05266aff5afd6244648e95f066 100644
--- a/test/FIR/arm_fir_filter_cf32/main.cpp
+++ b/test/FIR/arm_fir_filter_cf32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
@@ -48,7 +48,7 @@ int main(int argc, char **argv) {
       {8192, 32}, {10240, 4},
   };
   bool passed = true;
-  for (auto &p : params) {
+  for (const auto &p : params) {
     passed &= run_fir_test(p.first, p.second);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/FIR/arm_fir_filter_cf32_decimate_2/main.cpp b/test/FIR/arm_fir_filter_cf32_decimate_2/main.cpp
index b65a01c0037f8e67057fff87d6720c9468ba8cf6..9c8c8e844470e1a7cb743b1f68cd9dfd295bebee 100644
--- a/test/FIR/arm_fir_filter_cf32_decimate_2/main.cpp
+++ b/test/FIR/arm_fir_filter_cf32_decimate_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
@@ -54,7 +54,7 @@ int main(int argc, char **argv) {
       {8192, 32}, {10240, 32},
   };
   bool passed = true;
-  for (auto &p : params) {
+  for (const auto &p : params) {
     passed &= run_fir_test(p.first, p.second);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/FIR/arm_fir_filter_cs16/main.cpp b/test/FIR/arm_fir_filter_cs16/main.cpp
index e8c21c6ee0771309a77e6c9742a157cf82e9c9e5..7103678f65014a8f6c85f9e9d6264312d7dcd572 100644
--- a/test/FIR/arm_fir_filter_cs16/main.cpp
+++ b/test/FIR/arm_fir_filter_cs16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 
diff --git a/test/FIR/arm_fir_filter_cs16_decimate_2/main.cpp b/test/FIR/arm_fir_filter_cs16_decimate_2/main.cpp
index eca801ed5b059a18ae90866513dfd077b550e356..a247b9882683651052b0a2971db1b1ad5ce028b0 100644
--- a/test/FIR/arm_fir_filter_cs16_decimate_2/main.cpp
+++ b/test/FIR/arm_fir_filter_cs16_decimate_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 
@@ -54,7 +54,7 @@ int main(int argc, char **argv) {
       {8192, 32}, {10240, 32},
   };
   bool passed = true;
-  for (auto &p : params) {
+  for (const auto &p : params) {
     passed &= run_fir_test(p.first, p.second);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/LDPC/decoding/main.cpp b/test/LDPC/decoding/main.cpp
index 809d1148948d516e9d4d11afe2f12843aad0ae1e..9362a05146abc765d3da0d3393e5c80f0c08d454 100644
--- a/test/LDPC/decoding/main.cpp
+++ b/test/LDPC/decoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "../ldpc_test_common.hpp"
diff --git a/test/LDPC/encoding/ldpc_encoding_test_data.h b/test/LDPC/encoding/ldpc_encoding_test_data.h
index 792bcb807601ab988dfb468d28d51691357d3186..09947ba6b610fca5e071cc850027d84f9d4dbb72 100644
--- a/test/LDPC/encoding/ldpc_encoding_test_data.h
+++ b/test/LDPC/encoding/ldpc_encoding_test_data.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/test/LDPC/encoding/main.cpp b/test/LDPC/encoding/main.cpp
index 2c2ba9b4a65b4c0ddc69a2f03ec1a0931c61df94..8b78ea40bc6dd034ee3a67e4db8bdf523fd150eb 100644
--- a/test/LDPC/encoding/main.cpp
+++ b/test/LDPC/encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "../ldpc_test_common.hpp"
 #include "armral.h"
@@ -8,6 +8,7 @@
 #include "int8_utils.hpp"
 #include "ldpc_coding.hpp"
 #include "ldpc_encoding_test_data.h"
+
 #include <cstdlib>
 #include <cstring>
 
@@ -155,7 +156,7 @@ inline void set_remaining_bits(armral_ldpc_graph_t bg, uint32_t z, uint32_t lsi,
     // the number of index sets (8), and then the lifting set index
     // is added to  this
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * col_entries;
     uint32_t j = 0;
     for (; j < col_entries && col_ptr[j] < max_ind; ++j) {
@@ -210,7 +211,7 @@ std::vector<uint8_t> armral_ldpc_encode_block_ref(const uint8_t *data_in,
     // is first offset by the row start index multiplied by
     // the number of index sets (8), and then
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * col_entries;
     uint32_t j = 0;
     for (; j < col_entries && col_ptr[j] < max_message_ind; ++j) {
@@ -354,7 +355,9 @@ bool test_ldpc_encode_block(
     passed &= check_bytes_equal(encoding_bytes, punctured, tc);
   }
   if (!passed) {
+    // GCOVR_EXCL_START
     printf("[%s] one or more tests failed!\n", name);
+    // GCOVR_EXCL_STOP
   }
   return passed;
 }
diff --git a/test/LDPC/ldpc_test_common.hpp b/test/LDPC/ldpc_test_common.hpp
index 7ad4960fbfd8362d0f1ce5fd13d6803a89a9bdae..0623f9f5976381200498ee49c008c6030373072b 100644
--- a/test/LDPC/ldpc_test_common.hpp
+++ b/test/LDPC/ldpc_test_common.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "armral.h"
@@ -42,7 +42,7 @@ bool perform_parity_check(const uint8_t *c, uint32_t z,
     auto num_cols = graph->row_start_inds[row + 1] - row_start_ind;
     const auto *col_ptr = graph->col_inds + row_start_ind;
     const auto *shift_ptr = graph->shifts +
-                            row_start_ind * armral_ldpc::num_lifting_sets +
+                            row_start_ind * armral::ldpc::num_lifting_sets +
                             lsi * num_cols;
     // Loop through the rows in the block
     for (uint32_t zb = 0; zb < z; ++zb) {
diff --git a/test/LDPC/rate_matching/main.cpp b/test/LDPC/rate_matching/main.cpp
index e677b6f74923b0ade514ed02b3765fba03deaaed..783c882a2b825dcf994a43c81fd43eafcffa507e 100644
--- a/test/LDPC/rate_matching/main.cpp
+++ b/test/LDPC/rate_matching/main.cpp
@@ -1,10 +1,11 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "bit_utils.hpp"
 #include "int8_utils.hpp"
+
 #include <algorithm>
 #include <cassert>
 #include <cmath>
@@ -34,7 +35,7 @@ void ref_bit_selection(uint32_t z, uint32_t n, uint32_t e,
   auto *scratch_ptr2 = scratch_buf2.data();
 
   // bit selection as specified by section 5.4.2.1 in 3GPP TS 38.212
-  // remove Filler bits
+  // remove filler bits
   if (len_filler_bits > 0) {
 
     uint32_t len_s_f_bits = k - z * 2; // length of systematic & filler bits
@@ -47,9 +48,9 @@ void ref_bit_selection(uint32_t z, uint32_t n, uint32_t e,
       uint32_t len_s_bytes = len_s_bits >> 3;
       uint32_t len_p_bytes = len_p_bits >> 3;
 
-      memcpy(scratch_ptr1, in, len_s_bytes); // skip Filler bits
+      memcpy(scratch_ptr1, in, len_s_bytes); // skip filler bits
       memcpy(&scratch_ptr1[len_s_bytes], &in[len_s_f_bytes],
-             len_p_bytes); // skip Filler bits
+             len_p_bytes); // skip filler bits
 
     } else {
 
@@ -67,7 +68,7 @@ void ref_bit_selection(uint32_t z, uint32_t n, uint32_t e,
   // k0 depends on the redundancy version id.
   assert(n > 0);
   assert(e > 0);
-  assert(k0 >= 0 && k0 < n);
+  assert(k0 < n);
   assert(n % 2 == 0);
 
   for (uint32_t i = 0; i < (e + 7) / 8; i++) {
@@ -241,8 +242,8 @@ bool test_ref_rate_matching() {
   return passed;
 }
 
-int starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
-                      uint32_t ncb, uint32_t z) {
+uint32_t starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
+                           uint32_t ncb, uint32_t z) {
   // Starting position k0 of different redundancy versions
   // given as Table 5.4.2.1-2 in 3GPP TS 38.212, simplified
   // using the assumption N_cb = 66 * Z_c (base graph 1) or
@@ -377,7 +378,7 @@ bool test_ldpc_rate_matching(
           if (bg == LDPC_BASE_GRAPH_2) {
             g = 10 * z;
           }
-          // cosider single layer, single CB .
+          // Consider single layer, single CB.
           uint32_t num_res =
               qm *
               (mod == ARMRAL_MOD_QPSK ? 144 : 32); // 12 symbols or 3 symbols
diff --git a/test/LDPC/rate_recovery/main.cpp b/test/LDPC/rate_recovery/main.cpp
index 1993a3d9567bf5916ffc235f91fb0cbd1a70a080..499d98bf3e41b99e63a6b5b5e2cb4bdaa99bf232 100644
--- a/test/LDPC/rate_recovery/main.cpp
+++ b/test/LDPC/rate_recovery/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
@@ -18,7 +18,7 @@ void ref_undo_selection(uint32_t z, uint32_t n, uint32_t e,
                         const int8_t *in, int8_t *out) {
   // performs the inverse of the bit selection as specified by
   // section 5.4.2.1 in 3GPP TS 38.212
-  assert(k0 >= 0 && k0 < n);
+  assert(k0 < n);
   assert(e > 0);
 
   // As we aggregate LLRs, for a single message, out should be zero on entry.
@@ -122,8 +122,8 @@ void ref_undo_interleave(uint32_t e, uint32_t qm, const int8_t *in,
   }
 }
 
-int starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
-                      uint32_t ncb, uint32_t z) {
+uint32_t starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
+                           uint32_t ncb, uint32_t z) {
   // Duplicate of function with same name in rate_matching
 
   // Starting position k0 of different redundancy versions
@@ -363,7 +363,9 @@ bool test_ldpc_rate_recovery(
   }
 
   if (!passed) {
+    // GCOVR_EXCL_START
     printf("[%s] one or more tests failed!\n", name);
+    // GCOVR_EXCL_STOP
   }
   return passed;
 }
diff --git a/test/MatrixInv/batch/main.cpp b/test/MatrixInv/batch/main.cpp
index 22e039908864aa9e239beac349241a9265a1e52b..74b1fcdc61d2f969e73421bed25ce155321a7113 100644
--- a/test/MatrixInv/batch/main.cpp
+++ b/test/MatrixInv/batch/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cf32_utils.hpp"
@@ -26,17 +26,17 @@ static void reference_parallel_matinv_block(uint32_t m,
     printf(" > Check ref matrix %u/%u\n", batch + 1, batch_size);
 
     // allocate temporary matrices
-    armral_cmplx_f32_t mat[m * m];
-    armral_cmplx_f32_t res[m * m];
+    std::vector<armral_cmplx_f32_t> mat(m * m);
+    std::vector<armral_cmplx_f32_t> res(m * m);
 
     // unpack matrix
-    unpack_data(batch, batch_size, a, mat, m * m);
+    unpack_data(batch, batch_size, a, mat.data(), m * m);
 
     // run inversion on each matrix (sequentially)
-    reference_matinv_block(m, mat, res);
+    reference_matinv_block(m, mat.data(), res.data());
 
     // pack result
-    pack_data(batch, batch_size, res, b, m * m);
+    pack_data(batch, batch_size, res.data(), b, m * m);
   }
 }
 
diff --git a/test/MatrixInv/single/main.cpp b/test/MatrixInv/single/main.cpp
index 3280e11c230040f73b5d4f144295419fa48ebb97..9d7e3f935d8a0a6655b37035aff6f273e10f911c 100644
--- a/test/MatrixInv/single/main.cpp
+++ b/test/MatrixInv/single/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cf32_utils.hpp"
diff --git a/test/MatrixMult/batch/ArmSolve/main.cpp b/test/MatrixMult/batch/ArmSolve/main.cpp
index 5b09a82b12bc4cd75ff31d442192a20f86893975..4ce72015f84188378fee65ba6947e28382ba621a 100644
--- a/test/MatrixMult/batch/ArmSolve/main.cpp
+++ b/test/MatrixMult/batch/ArmSolve/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 #include "cs16_utils.hpp"
diff --git a/test/MatrixMult/batch/MatrixVectorMult16/main.cpp b/test/MatrixMult/batch/MatrixVectorMult16/main.cpp
index c8b14c09456382aed2d55e6bb647190c0489bacb..c8e851027812cd1597c19d4636ddb314c6ac87b8 100644
--- a/test/MatrixMult/batch/MatrixVectorMult16/main.cpp
+++ b/test/MatrixMult/batch/MatrixVectorMult16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "matrix_utils.hpp"
diff --git a/test/MatrixMult/batch/MatrixVectorMult32/main.cpp b/test/MatrixMult/batch/MatrixVectorMult32/main.cpp
index 48720507f845d0853a76e48102bbd5214c731a54..41f3c73157c9bb88a79c015e5dc5b301c1268a04 100644
--- a/test/MatrixMult/batch/MatrixVectorMult32/main.cpp
+++ b/test/MatrixMult/batch/MatrixVectorMult32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "cf32_utils.hpp"
diff --git a/test/MatrixMult/single/MatrixMult16/main.cpp b/test/MatrixMult/single/MatrixMult16/main.cpp
index 98fed6d86895eb70f81993692f27f0c9efe21178..e882b7d522feaed18477e9e3c25aea585015f86e 100644
--- a/test/MatrixMult/single/MatrixMult16/main.cpp
+++ b/test/MatrixMult/single/MatrixMult16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "reference_linalg.hpp"
 
diff --git a/test/MatrixMult/single/MatrixMult32/main.cpp b/test/MatrixMult/single/MatrixMult32/main.cpp
index 1f7b66b4dd88e51a06b53a310aace0863ffd2d99..a77c7f808222c5137cf6be444a199bea06f9e9bd 100644
--- a/test/MatrixMult/single/MatrixMult32/main.cpp
+++ b/test/MatrixMult/single/MatrixMult32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 #include "reference_linalg.hpp"
diff --git a/test/MatrixMult/single/MatrixMultAAH32/main.cpp b/test/MatrixMult/single/MatrixMultAAH32/main.cpp
index b6616bf061b2b9381379c3bf0782e38baee6acc7..83c4e01989e0bb870d2e2543e0e85a33ddb04343 100644
--- a/test/MatrixMult/single/MatrixMultAAH32/main.cpp
+++ b/test/MatrixMult/single/MatrixMultAAH32/main.cpp
@@ -1,8 +1,9 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
+#include "matrix_utils.hpp"
 #include "reference_linalg.hpp"
 
 static bool run_matmul_aah_cf32_test(uint16_t m, uint16_t n) {
diff --git a/test/MatrixMult/single/MatrixMultAHB32/main.cpp b/test/MatrixMult/single/MatrixMultAHB32/main.cpp
index 69b4498cb6e0efb1cc6b116a3dd9b50f50f5ef03..c09b5b7814fc3daeb8a4cfb7fe3cc26fae70d466 100644
--- a/test/MatrixMult/single/MatrixMultAHB32/main.cpp
+++ b/test/MatrixMult/single/MatrixMultAHB32/main.cpp
@@ -1,12 +1,13 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include <array>
 #include <cstdio>
 
 #include "cf32_utils.hpp"
+#include "matrix_utils.hpp"
 #include "reference_linalg.hpp"
 
 static bool run_matmul_ahb_cf32_test(uint16_t m, uint16_t n, uint16_t k) {
diff --git a/test/MatrixMult/single/MatrixVectorMult16/main.cpp b/test/MatrixMult/single/MatrixVectorMult16/main.cpp
index f9acbdff0a5f79a35dc4ad911a7659c62fde7f76..f6d987d3ae82e11a3c171926b9d4f6730bf82e28 100644
--- a/test/MatrixMult/single/MatrixVectorMult16/main.cpp
+++ b/test/MatrixMult/single/MatrixVectorMult16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "reference_linalg.hpp"
diff --git a/test/MatrixMult/single/MatrixVectorMult32/main.cpp b/test/MatrixMult/single/MatrixVectorMult32/main.cpp
index a60d3049c23b113d634e104e1dca1fecc9ceaf19..4283a28e5e5a8a9663e58c7663309804c937268a 100644
--- a/test/MatrixMult/single/MatrixVectorMult32/main.cpp
+++ b/test/MatrixMult/single/MatrixVectorMult32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 #include "reference_linalg.hpp"
diff --git a/test/MatrixPseudoInv/direct/main.cpp b/test/MatrixPseudoInv/direct/main.cpp
index ab578c62de83f2d9a70c4606af1967a500217b5a..74f99d8c64725f62dfca2de8a5798fb0e2c905b4 100644
--- a/test/MatrixPseudoInv/direct/main.cpp
+++ b/test/MatrixPseudoInv/direct/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 #include "matrix_utils.hpp"
@@ -10,9 +10,32 @@
 #include <vector>
 
 static inline void
-reference_pseudo_inverse_direct(uint32_t m, uint32_t n, float32_t lambda,
-                                const armral_cmplx_f32_t *__restrict p_src,
-                                armral_cmplx_f32_t *p_dst) {
+reference_left_pseudo_inverse_direct(uint32_t m, uint32_t n, float32_t lambda,
+                                     const armral_cmplx_f32_t *__restrict p_src,
+                                     armral_cmplx_f32_t *p_dst) {
+  // Compute C = A^H * A
+  // We can use p_dst as an intermediate N-by-N array since it has size N-by-M,
+  // and N < M
+  auto *mat_aha = p_dst;
+  reference_matmul_aha_cf32(m, n, p_src, mat_aha);
+
+  // Compute C + lambda * I
+  for (uint32_t i = 0; i < n; i++) {
+    uint32_t idx = i * n + i;
+    mat_aha[idx].re += lambda;
+  }
+
+  // Compute B = C^(-1)
+  std::vector<armral_cmplx_f32_t> mat_inv(n * n);
+  reference_matinv_block(n, mat_aha, mat_inv.data());
+
+  // Compute B * A^H
+  reference_matmul_bah_cf32(m, n, p_src, mat_inv.data(), p_dst);
+}
+
+static inline void reference_right_pseudo_inverse_direct(
+    uint32_t m, uint32_t n, float32_t lambda,
+    const armral_cmplx_f32_t *__restrict p_src, armral_cmplx_f32_t *p_dst) {
   // Compute C = A * A^H
   // We can use p_dst as an intermediate M-by-M array since it has size N-by-M,
   // and N >= M
@@ -33,6 +56,16 @@ reference_pseudo_inverse_direct(uint32_t m, uint32_t n, float32_t lambda,
   reference_matmul_ahb_cf32(m, n, m, p_src, mat_inv.data(), p_dst);
 }
 
+static inline void
+reference_pseudo_inverse_direct(uint32_t m, uint32_t n, float32_t lambda,
+                                const armral_cmplx_f32_t *__restrict p_src,
+                                armral_cmplx_f32_t *p_dst) {
+  if (m > n) {
+    return reference_left_pseudo_inverse_direct(m, n, lambda, p_src, p_dst);
+  }
+  return reference_right_pseudo_inverse_direct(m, n, lambda, p_src, p_dst);
+}
+
 template<typename PseudoInverseFunction>
 static bool run_pseudo_inverse_direct_cf32_test(
     const char *name, uint32_t m, uint32_t n, float32_t lambda,
@@ -56,19 +89,30 @@ bool run_all_tests(char const *test_name, char const *function_name,
   bool passed = true;
 
   const std::tuple<uint32_t, uint32_t, float32_t> params[] = {
-      {2, 5, -0.968591},  {2, 84, 0.191647},   {2, 67, 0.0},
-      {3, 18, -1.218053}, {3, 138, 1.597186},  {3, 161, 0.0},
-      {4, 20, -0.474817}, {4, 105, 0.944802},  {4, 94, 0.0},
-      {8, 35, -1.991369}, {8, 200, -1.244298}, {8, 190, 0.0},
-      {16, 32, 0.809352}, {16, 80, 1.810591},  {16, 117, 0.0}};
-  for (const auto &[m, n, l] : params) {
-    printf("[%s] m=%d, n=%d, l=%f\n", function_name, m, n, l);
-    passed &= run_pseudo_inverse_direct_cf32_test(function_name, m, n, l,
+      {2, 5, -0.968591},   {2, 84, 0.191647},   {2, 2, 1.457848},
+      {2, 67, 0.0},        {3, 18, -1.218053},  {3, 138, 1.597186},
+      {3, 3, -1.2435186},  {3, 161, 0.0},       {4, 20, -0.474817},
+      {4, 105, 0.944802},  {4, 4, 1.645646},    {4, 94, 0.0},
+      {8, 35, -1.991369},  {8, 200, -1.244298}, {8, 8, 1.445767},
+      {8, 190, 0.0},       {16, 32, 0.809352},  {16, 80, 1.810591},
+      {16, 16, -0.426745}, {16, 117, 0.0}};
+  for (const auto &[dim1, dim2, l] : params) {
+    printf("[%s] m=%d, n=%d, l=%f\n", function_name, dim1, dim2, l);
+    passed &= run_pseudo_inverse_direct_cf32_test(function_name, dim1, dim2, l,
                                                   pseudo_inverse_under_test);
+
+    // There is no need to test the square input cases again
+    if (dim1 != dim2) {
+      printf("[%s] m=%d, n=%d, l=%f\n", function_name, dim2, dim1, l);
+      passed &= run_pseudo_inverse_direct_cf32_test(
+          function_name, dim2, dim1, l, pseudo_inverse_under_test);
+    }
   }
 
   if (!passed) {
+    // GCOVR_EXCL_START
     printf("[%s] one or more tests failed!\n", test_name);
+    // GCOVR_EXCL_STOP
   }
 
   return passed;
@@ -77,18 +121,22 @@ bool run_all_tests(char const *test_name, char const *function_name,
 int main() {
   bool passed = true;
 
+  // Tests for pseudo-inverse
   passed &= run_all_tests("PseudoInverseDirect",
                           "armral_cmplx_pseudo_inverse_direct_f32",
                           armral_cmplx_pseudo_inverse_direct_f32);
 
-  passed &= run_all_tests(
-      "PseudoInverseDirectNoAlloc",
-      "armral_cmplx_pseudo_inverse_direct_f32_noalloc",
-      [](uint32_t m, auto... args) {
-        std::vector<uint8_t> buffer(m * m * sizeof(armral_cmplx_f32_t) + 3);
-        return armral_cmplx_pseudo_inverse_direct_f32_noalloc(m, args...,
-                                                              buffer.data());
-      });
+  // Tests for non-allocating pseudo-inverse
+  passed &=
+      run_all_tests("PseudoInverseDirectNoAlloc",
+                    "armral_cmplx_pseudo_inverse_direct_f32_noalloc",
+                    [](uint32_t m, uint32_t n, auto... args) {
+                      uint32_t size = m > n ? n : m;
+                      std::vector<uint8_t> buffer(
+                          size * size * sizeof(armral_cmplx_f32_t) + 3);
+                      return armral_cmplx_pseudo_inverse_direct_f32_noalloc(
+                          m, n, args..., buffer.data());
+                    });
 
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
 }
diff --git a/test/Modulation/main.cpp b/test/Modulation/main.cpp
index f931ae41a76762943e0d1b9b8cbe3d81f6bed63f..0cb0a3ad89f10fdbfbac25dd68edf7d240a1b9d6 100644
--- a/test/Modulation/main.cpp
+++ b/test/Modulation/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cs16_utils.hpp"
diff --git a/test/MuLaw/Compression/main.cpp b/test/MuLaw/Compression/main.cpp
index 9eba4e6c1d5e97cb7f99e37a338cb0add13c2390..c1b8e6c4a4a840eac9a2976290ea1c1758edae2e 100644
--- a/test/MuLaw/Compression/main.cpp
+++ b/test/MuLaw/Compression/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cs16_utils.hpp"
diff --git a/test/MuLaw/Decompression/main.cpp b/test/MuLaw/Decompression/main.cpp
index f3f6a1716dc3dd919df7ed86d93694ae293c40cc..067af2dabb8fb6461eeb0159d7abc750202587b7 100644
--- a/test/MuLaw/Decompression/main.cpp
+++ b/test/MuLaw/Decompression/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "cs16_utils.hpp"
diff --git a/test/ORanBlockScaling/Compression/main.cpp b/test/ORanBlockScaling/Compression/main.cpp
index ddf6d1af5cafdf192d4d227cd9828349ce2c3c67..ac0356ce64e1057664ac78ffe53a1ddaeb375027 100644
--- a/test/ORanBlockScaling/Compression/main.cpp
+++ b/test/ORanBlockScaling/Compression/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/test/ORanBlockScaling/Decompression/main.cpp b/test/ORanBlockScaling/Decompression/main.cpp
index 89631ba3718d5e6c08e1b828364672df7c42da3f..21744933a0b484259f908132cd141ba9a2d252b7 100644
--- a/test/ORanBlockScaling/Decompression/main.cpp
+++ b/test/ORanBlockScaling/Decompression/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/test/Polar/crc_attachment/main.cpp b/test/Polar/crc_attachment/main.cpp
index 9673d91a15c469e742f5c5dc111861cd42d728a4..8d67cd49cd15fcdd77b407a944f39de441ec920e 100644
--- a/test/Polar/crc_attachment/main.cpp
+++ b/test/Polar/crc_attachment/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "int8_utils.hpp"
 #include "polar_crc_attach_data.hpp"
diff --git a/test/Polar/crc_attachment/polar_crc_attach_data.hpp b/test/Polar/crc_attachment/polar_crc_attach_data.hpp
index 8c5dfeb24e3d7d805c05a982e8f92df1c3063557..cb8c986ddb2ce7a083167b86853d4a30ea01fa0a 100644
--- a/test/Polar/crc_attachment/polar_crc_attach_data.hpp
+++ b/test/Polar/crc_attachment/polar_crc_attach_data.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/test/Polar/decoding/main.cpp b/test/Polar/decoding/main.cpp
index 31ae785d9ee5e9436b8b5a27b07a06c72459b657..5b36846ddbd5b7de3ebe25cf5d4b493e00e2d21a 100644
--- a/test/Polar/decoding/main.cpp
+++ b/test/Polar/decoding/main.cpp
@@ -1,12 +1,13 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
 #include <cstdio>
 #include <cstring>
 
+template<bool test_noalloc>
 static bool run_polar_decoding_test(uint32_t n, uint32_t e, uint32_t k,
                                     uint32_t n_pc, uint32_t n_pc_wm,
                                     uint32_t l) {
@@ -69,13 +70,16 @@ static bool run_polar_decoding_test(uint32_t n, uint32_t e, uint32_t k,
     armral_polar_subchannel_deinterleave(
         k, frozen_mask.data(), out.data() + i * n / 8, data_deint.data());
 
-#ifdef ARMRAL_TESTING_NOALLOC
-    std::vector<uint8_t> buffer(armral_polar_crc_check_noalloc_buffer_size(k));
-    bool crc_pass =
-        armral_polar_crc_check_noalloc(data_deint.data(), k, buffer.data());
-#else
-    bool crc_pass = armral_polar_crc_check(data_deint.data(), k);
-#endif
+    bool crc_pass;
+    if constexpr (test_noalloc) {
+      std::vector<uint8_t> buffer(
+          armral_polar_crc_check_noalloc_buffer_size(k));
+      crc_pass =
+          armral_polar_crc_check_noalloc(data_deint.data(), k, buffer.data());
+    } else {
+      crc_pass = armral_polar_crc_check(data_deint.data(), k);
+    }
+
     if (crc_pass) {
       return check_results_u8("POLAR DECODING", data_deint.data(), in.data(),
                               (k + 7) / 8);
@@ -94,13 +98,16 @@ int main(int argc, char **argv) {
   bool passed = true;
   for (auto l : {1, 2, 4, 8}) {
     for (auto n : {32, 64, 128, 256, 512, 1024}) {
-      for (int k = 25; k <= n; k += 7) {
+      for (int k = 25; k <= n; k += 11) {
         // test e >= n to check repetition doesn't affect the frozen mask.
-        for (int e = k; e <= n + 11; e += 11) {
-          passed &= run_polar_decoding_test(n, e, k, 0, 0, l);
+        for (int e = k; e <= n + 17; e += 17) {
+          passed &= run_polar_decoding_test<false>(n, e, k, 0, 0, l);
+          passed &= run_polar_decoding_test<true>(n, e, k, 0, 0, l);
           if (k + 3 <= n && k + 3 <= e) {
-            passed &= run_polar_decoding_test(n, e, k, 3, 0, l);
-            passed &= run_polar_decoding_test(n, e, k, 3, 1, l);
+            passed &= run_polar_decoding_test<false>(n, e, k, 3, 0, l);
+            passed &= run_polar_decoding_test<false>(n, e, k, 3, 1, l);
+            passed &= run_polar_decoding_test<true>(n, e, k, 3, 0, l);
+            passed &= run_polar_decoding_test<true>(n, e, k, 3, 1, l);
           }
         }
       }
diff --git a/test/Polar/encoding/main.cpp b/test/Polar/encoding/main.cpp
index 22220d84f1c57c9ed28c3d8de854650959acc877..7c1d9ac2dbf51b3501a57267e5132c7725f356d5 100644
--- a/test/Polar/encoding/main.cpp
+++ b/test/Polar/encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/test/Polar/frozen/main.cpp b/test/Polar/frozen/main.cpp
index 041c188fca5b824619fee2a1af96303d4590cef3..5be4671fac38eebb6ac2446c8f16a6e75e3b627f 100644
--- a/test/Polar/frozen/main.cpp
+++ b/test/Polar/frozen/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
diff --git a/test/Polar/rate_matching/main.cpp b/test/Polar/rate_matching/main.cpp
index 00376cea9e030035ab231ab31e4febc8a13b9762..6afd323d704b83d6e7c6c17516603359efc76d33 100644
--- a/test/Polar/rate_matching/main.cpp
+++ b/test/Polar/rate_matching/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
diff --git a/test/Polar/rate_recovery/main.cpp b/test/Polar/rate_recovery/main.cpp
index 5d985ffb9f3f52d21f4a3dff8ff6e133c1d288b7..fe3ce7d4069852f11a0177b4999cde83e8ce9313 100644
--- a/test/Polar/rate_recovery/main.cpp
+++ b/test/Polar/rate_recovery/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
@@ -15,15 +15,19 @@ static bool check_llrs_equal(const int8_t *result, const int8_t *expected,
   bool passed = true;
   for (uint32_t i = 0; i < n_values; ++i) {
     if (result[i] != expected[i]) {
+      // GCOVR_EXCL_START
       printf("Sample %u: LLR calculated = %d != LLR expected "
              "= %d --> ERROR \n",
              i, result[i], expected[i]);
       passed = false;
+      // GCOVR_EXCL_STOP
     }
   }
 
   if (!passed) {
+    // GCOVR_EXCL_START
     printf("Check failed!\n");
+    // GCOVR_EXCL_STOP
   } else {
     printf("Check successful!\n");
   }
diff --git a/test/Polar/subchannel_deinterleave/main.cpp b/test/Polar/subchannel_deinterleave/main.cpp
index 77ba43fe73e06b132b2148061a51314758d3b8bb..59b3ef8e890fdd0b45561c86c7829cffe4e865dc 100644
--- a/test/Polar/subchannel_deinterleave/main.cpp
+++ b/test/Polar/subchannel_deinterleave/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
diff --git a/test/Polar/subchannel_interleave/main.cpp b/test/Polar/subchannel_interleave/main.cpp
index eb6b31fce8a041a945125bc36125fcdad5f200a8..bfc6e55d6277d9abe4480203b89f3e152ff1f583 100644
--- a/test/Polar/subchannel_interleave/main.cpp
+++ b/test/Polar/subchannel_interleave/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
diff --git a/test/SVD/main.cpp b/test/SVD/main.cpp
index 7b1a5f9b3e3397238cbd19226200d30e43b001c6..a1427907c19d2b14f0c4f4555e0e7b265b09917c 100644
--- a/test/SVD/main.cpp
+++ b/test/SVD/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "svd_sample_data.h"
@@ -141,7 +141,9 @@ bool run_all_tests(char const *name, SVDFunction svd_function) {
   }
 
   if (!passed) {
+    // GCOVR_EXCL_START
     printf("[%s] one or more tests failed!\n", name);
+    // GCOVR_EXCL_STOP
   }
   return passed;
 }
diff --git a/test/SVD/svd_sample_data.h b/test/SVD/svd_sample_data.h
index e2cfab3a05df9ff7dd2a98bdaf4b12e9b881205f..0add7527886551250b129dfc5bb486270aac7049 100644
--- a/test/SVD/svd_sample_data.h
+++ b/test/SVD/svd_sample_data.h
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/test/SVD/svd_test.hpp b/test/SVD/svd_test.hpp
index 293ed4d975da3cf777c275a2c2a123462674261b..a3ca48541b71b0f4d93dcf296ac70b46fab059fb 100644
--- a/test/SVD/svd_test.hpp
+++ b/test/SVD/svd_test.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
@@ -160,7 +160,7 @@ static inline void householder_qr(const int m, const int n, cf32_t *a,
 }
 
 // Apply implicitly Q to an input matrix C of the same dimension
-// as the marix A that has been factorized into QR or bidiagonalisation.
+// as the matrix A that has been factorized into QR or bidiagonalisation.
 static inline void apply_q(int m, int n, const cf32_t *a, const cf32_t *tau,
                            cf32_t *c) {
   if (m < n) {
@@ -171,7 +171,7 @@ static inline void apply_q(int m, int n, const cf32_t *a, const cf32_t *tau,
   }
   std::vector<cf32_t> q(m * n);
   memcpy(q.data(), a, m * n * sizeof(cf32_t));
-  column_major_matrix_view q_mat{q, m};
+  column_major_matrix_view q_mat{q.data(), m};
   column_major_matrix_view c_mat{c, m};
   for (int i = n - 1; i >= 0; i--) {
     q_mat(i, i) = 1.0F;
@@ -212,7 +212,7 @@ static inline std::vector<cf32_t> get_q(const int m, const int n,
     // GCOVR_EXCL_STOP
   }
   std::vector<cf32_t> q = a;
-  column_major_matrix_view q_mat{q, m};
+  column_major_matrix_view q_mat{q.data(), m};
   // Accumulate reflectors from right to left
   // Q = H1 * H2....* Hn. They are applied to identity.
   for (int i = n - 1; i >= 0; i--) {
@@ -352,7 +352,7 @@ static inline void generate_svd_matrix(const int m, const int n,
   std::vector<cf32_t> q2 = get_q(n, n, a2, tau2);
 
   // multiply left orthogonal matrix by S
-  column_major_matrix_view q2_mat{q2, n};
+  column_major_matrix_view q2_mat{q2.data(), n};
   for (int i = 0; i < n; i++) {
     for (int j = 0; j < n; j++) {
       q2_mat(i, j) *= s[i];
@@ -360,7 +360,7 @@ static inline void generate_svd_matrix(const int m, const int n,
   }
   // Apply Q1 to S*Q2, but first copy Q2 in an m * n matrix
   std::vector<cf32_t> a_cmplx(m * n);
-  column_major_matrix_view q2_mat_mn{a_cmplx, m};
+  column_major_matrix_view q2_mat_mn{a_cmplx.data(), m};
   for (int i = 0; i < n; i++) {
     for (int j = 0; j < n; j++) {
       q2_mat_mn(i, j) = q2_mat(i, j);
@@ -731,7 +731,7 @@ static inline int qr_svd_cf32(const bool gen_singular_vect, const int m,
                               const int n, std::vector<cf32_t> &a,
                               std::vector<float> &s, std::vector<cf32_t> &u,
                               std::vector<cf32_t> &vt) {
-  column_major_matrix_view a_mat{a, m};
+  column_major_matrix_view a_mat{a.data(), m};
 
   // Perform the QR factorization of A.
   std::vector<cf32_t> tau(n);
@@ -739,7 +739,7 @@ static inline int qr_svd_cf32(const bool gen_singular_vect, const int m,
 
   // Extract the R.
   std::vector<cf32_t> r(n * n);
-  column_major_matrix_view r_mat{r, n};
+  column_major_matrix_view r_mat{r.data(), n};
   for (int i = 0; i < n; i++) {
     for (int j = i; j < n; j++) {
       r_mat(i, j) = a_mat(i, j);
@@ -758,8 +758,8 @@ static inline int qr_svd_cf32(const bool gen_singular_vect, const int m,
     // Copy u1 in u
     // Initialise u to zero in case it is not.
     u.assign(u.size(), 0.0F);
-    column_major_matrix_view u_mat{u, m};
-    column_major_matrix_view u1_mat{u1, n};
+    column_major_matrix_view u_mat{u.data(), m};
+    column_major_matrix_view u1_mat{u1.data(), n};
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < n; j++) {
         u_mat(i, j) = u1_mat(i, j);
@@ -786,7 +786,7 @@ static inline bool check_orthogonality(const int m, const int n, cf32_t *q) {
 
   // Build an identity matrix Id
   std::vector<cf32_t> a(n * n);
-  column_major_matrix_view a_mat{a, n};
+  column_major_matrix_view a_mat{a.data(), n};
   for (int i = 0; i < n; i++) {
     a_mat(i, i) = 1.0F;
   }
@@ -826,7 +826,7 @@ static inline bool check_qr_decomposition(int m, int n, const cf32_t *aref,
   // Extract R, allocate m-by-n memory for
   // the multiplication by A later
   std::vector<cf32_t> r(m * n);
-  column_major_matrix_view r_mat{r, m};
+  column_major_matrix_view r_mat{r.data(), m};
   for (int i = 0; i < n; i++) {
     for (int j = i; j < n; j++) {
       r_mat(i, j) = a_mat(i, j);
@@ -838,7 +838,7 @@ static inline bool check_qr_decomposition(int m, int n, const cf32_t *aref,
   // Copy Aref
   std::vector<cf32_t> c(m * n);
   memcpy(c.data(), aref, m * n * sizeof(cf32_t));
-  column_major_matrix_view c_mat{c, m};
+  column_major_matrix_view c_mat{c.data(), m};
 
   // Compute Aref = Aref - QR
   for (int i = 0; i < m; i++) {
@@ -907,7 +907,7 @@ static inline bool check_bidiag_decomposition(int m, int n, const cf32_t *aref,
   get_p(m, n, a, taup, p.data());
   // Build explicitly the n-by-n bidiagonal matrix B
   std::vector<cf32_t> b(n * n);
-  column_major_matrix_view b_mat{b, n};
+  column_major_matrix_view b_mat{b.data(), n};
   for (int i = 0; i < n - 1; i++) {
     b_mat(i, i) = d[i];
     b_mat(i, i + 1) = e[i];
@@ -924,7 +924,7 @@ static inline bool check_bidiag_decomposition(int m, int n, const cf32_t *aref,
 
   // Compute Aref - Q * B * VT
   column_major_matrix_view aref_mat{aref, m};
-  column_major_matrix_view c_mat{c, m};
+  column_major_matrix_view c_mat{c.data(), m};
   for (int i = 0; i < m; i++) {
     for (int j = 0; j < n; j++) {
       c_mat(i, j) -= aref_mat(i, j);
@@ -980,7 +980,7 @@ static inline bool check_svd_decomposition(int m, int n, const cf32_t *a,
   // U1 = U * S
   std::vector<cf32_t> u1(m * n);
   column_major_matrix_view u_mat{u, m};
-  column_major_matrix_view u1_mat{u1, m};
+  column_major_matrix_view u1_mat{u1.data(), m};
   for (int i = 0; i < m; i++) {
     for (int j = 0; j < n; j++) {
       u1_mat(i, j) = u_mat(i, j) * s[j];
diff --git a/test/Scrambling/main.cpp b/test/Scrambling/main.cpp
index ecb40148893211467a9572f8dbaac1e500de72f3..777276f53062d7d6a0f063c4cbe04443d5c251aa 100644
--- a/test/Scrambling/main.cpp
+++ b/test/Scrambling/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "bit_utils.hpp"
diff --git a/test/SeqGenerator/main.cpp b/test/SeqGenerator/main.cpp
index f7e3e4956ddaef8d8c34afb3523156fca1887971..8bb2f614f4a3e69899a4399d2c3c38dac3a10db3 100644
--- a/test/SeqGenerator/main.cpp
+++ b/test/SeqGenerator/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/test/Turbo/decoding/main.cpp b/test/Turbo/decoding/main.cpp
index a46ea21369171bb850c003eb05c63478a27aaf18..027056ed5b2768b88f916b0a7a52d26ec0a975fd 100644
--- a/test/Turbo/decoding/main.cpp
+++ b/test/Turbo/decoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
@@ -104,7 +104,6 @@ int main(int argc, char **argv) {
   passed &= run_turbo_decoding_parameter_test();
 
   // Check decoder decodes correctly
-
   for (auto k : valid_ks) {
     passed &=
         run_turbo_decoding_test("TurboDecoding", k, armral_turbo_decode_block);
diff --git a/test/Turbo/encoding/main.cpp b/test/Turbo/encoding/main.cpp
index fcad110fc275c61803ba6fee01cd992b259dc79b..b945f22727afb5669639ab84b82aa5ae7da1c6a2 100644
--- a/test/Turbo/encoding/main.cpp
+++ b/test/Turbo/encoding/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 
diff --git a/test/Turbo/encoding/reference_turbo_encoder.hpp b/test/Turbo/encoding/reference_turbo_encoder.hpp
index d625df69b095462bfc14e8145035811f4f29db3c..aec3668e73f782d509e85a84e4bf42729a3e7035 100644
--- a/test/Turbo/encoding/reference_turbo_encoder.hpp
+++ b/test/Turbo/encoding/reference_turbo_encoder.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/test/Turbo/rate_matching/main.cpp b/test/Turbo/rate_matching/main.cpp
index db8d2f621eb009d15d978301c54d53b4d0302c78..be9c29f4bfca60a57d9b6e0254c522df742bab32 100644
--- a/test/Turbo/rate_matching/main.cpp
+++ b/test/Turbo/rate_matching/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
diff --git a/test/Turbo/rate_recovery/main.cpp b/test/Turbo/rate_recovery/main.cpp
index b27b4c1563835b6fcc028b5b0c6dba832d064f8a..36c748b9d7da78707ba82e1940f025ec59b3075d 100644
--- a/test/Turbo/rate_recovery/main.cpp
+++ b/test/Turbo/rate_recovery/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
 #include "int8_utils.hpp"
diff --git a/test/Turbo/rate_recovery/rate_recovery_data.hpp b/test/Turbo/rate_recovery/rate_recovery_data.hpp
index 7cec34250750971da22e838ebb531690f258f4ec..0117f1481118fcf05c2c412a3436a35d42afc8c0 100644
--- a/test/Turbo/rate_recovery/rate_recovery_data.hpp
+++ b/test/Turbo/rate_recovery/rate_recovery_data.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/test/Turbo/turbo_test_data.hpp b/test/Turbo/turbo_test_data.hpp
index 9efa245a0ee1db9bdcaa3a9c28f2905c1d198e2f..cc47d69638e1c42eccdb309f021fa9cf4c18cbfa 100644
--- a/test/Turbo/turbo_test_data.hpp
+++ b/test/Turbo/turbo_test_data.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/test/VectorDotProd/vecDot16/main.cpp b/test/VectorDotProd/vecDot16/main.cpp
index 44b800a945c38272fd0d909c8a262aff5443be43..08126313dfae1a3c56b11242fcf3aa34f50f4818 100644
--- a/test/VectorDotProd/vecDot16/main.cpp
+++ b/test/VectorDotProd/vecDot16/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "qint64.hpp"
diff --git a/test/VectorDotProd/vecDot16_2/main.cpp b/test/VectorDotProd/vecDot16_2/main.cpp
index 913f3a15fcb33ab1033b035983ed1963cd34c1a7..764b191e8956066bae74f6a80997b5f243be0804 100644
--- a/test/VectorDotProd/vecDot16_2/main.cpp
+++ b/test/VectorDotProd/vecDot16_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "qint64.hpp"
@@ -40,7 +40,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_dot_test(n);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/VectorDotProd/vecDot16_2_32bit/main.cpp b/test/VectorDotProd/vecDot16_2_32bit/main.cpp
index 63a6fd2106b8a26220fa014eba7df314e4269a2b..8fa6efb56b6d5a2bef30a9e940e804d32e01e1a7 100644
--- a/test/VectorDotProd/vecDot16_2_32bit/main.cpp
+++ b/test/VectorDotProd/vecDot16_2_32bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "qint64.hpp"
@@ -44,7 +44,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_dot_test(n);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/VectorDotProd/vecDot16_32bit/main.cpp b/test/VectorDotProd/vecDot16_32bit/main.cpp
index 30ae98b9b1a9ed6227570942db1bf6a9e097a8d6..f400649380073fc2075d8bd17c5b669643aec7fa 100644
--- a/test/VectorDotProd/vecDot16_32bit/main.cpp
+++ b/test/VectorDotProd/vecDot16_32bit/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "qint64.hpp"
diff --git a/test/VectorDotProd/vecDot32/main.cpp b/test/VectorDotProd/vecDot32/main.cpp
index 7229866b7c07c3ad491d3bc4364464210b86518c..beca6f0a43d55e64c410a5d74834311126bba96f 100644
--- a/test/VectorDotProd/vecDot32/main.cpp
+++ b/test/VectorDotProd/vecDot32/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
@@ -29,7 +29,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_dot_test(n);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/VectorDotProd/vecDot32_2/main.cpp b/test/VectorDotProd/vecDot32_2/main.cpp
index ca50a1595f666a625c74b5edda193e9a78213ed6..b40850c6a3e676588b941ba3609f4361c274661a 100644
--- a/test/VectorDotProd/vecDot32_2/main.cpp
+++ b/test/VectorDotProd/vecDot32_2/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cf32_utils.hpp"
 
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
       1, 2, 3, 4, 5, 7, 8, 15, 16, 32, 64, 100, 128, 151, 256, 512, 1024,
   };
   bool passed = true;
-  for (auto &n : params) {
+  for (const auto &n : params) {
     passed &= run_vec_dot_test(n);
   }
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/XRanBlockFloat/Compression/main.cpp b/test/XRanBlockFloat/Compression/main.cpp
index 14a9e545548101daa06351fb600befb38c9a5f7e..43824bd810e59c0376068de9f9d5450b069e8291 100644
--- a/test/XRanBlockFloat/Compression/main.cpp
+++ b/test/XRanBlockFloat/Compression/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/test/XRanBlockFloat/Decompression/main.cpp b/test/XRanBlockFloat/Decompression/main.cpp
index e50737bd537eaef748921fba755e5d1f235fbdb3..4506d3bbf9afb7a6103cd4cddcd7b5623b5d02e0 100644
--- a/test/XRanBlockFloat/Decompression/main.cpp
+++ b/test/XRanBlockFloat/Decompression/main.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "cs16_utils.hpp"
 #include "int8_utils.hpp"
diff --git a/utils/bit_utils.hpp b/utils/bit_utils.hpp
index fc85ac20d2dc21820c63d94bfdee24a6887c3882..0de9b1c46cb4350c5ea0f1f53ff2e3e5e37e7f20 100644
--- a/utils/bit_utils.hpp
+++ b/utils/bit_utils.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/utils/cf32_utils.hpp b/utils/cf32_utils.hpp
index 7e41eb1330bee56180965573f007212d04145410..41f2c4f52ace05165b40b8a1bfb93f0f482fc3cf 100644
--- a/utils/cf32_utils.hpp
+++ b/utils/cf32_utils.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/utils/cs16_utils.hpp b/utils/cs16_utils.hpp
index 0d64613256c9c33904cf3aded982498c6b88421f..6824f3cb75b918b4f521cd3fc8790eb25e364cfa 100644
--- a/utils/cs16_utils.hpp
+++ b/utils/cs16_utils.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/utils/fft_utils.hpp b/utils/fft_utils.hpp
index 5de9ef289f31d74639bf329af93ca43868d3fea6..c99371ccf3a8825a947e279646ecf889db77a899 100644
--- a/utils/fft_utils.hpp
+++ b/utils/fft_utils.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/utils/int8_utils.hpp b/utils/int8_utils.hpp
index 430baf0a31ea5b3626d04812e66c7a77dcd6e28e..ec5d103c52a1398cfb360e293b8f4fbfb910d6c6 100644
--- a/utils/int8_utils.hpp
+++ b/utils/int8_utils.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/utils/matrix_utils.hpp b/utils/matrix_utils.hpp
index 3b27451ebd50c52b15656aa3a0e4b2b9c50cbe6b..d15e31ec8985a85b5fd395b9739a50508722c856 100644
--- a/utils/matrix_utils.hpp
+++ b/utils/matrix_utils.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
@@ -8,20 +8,6 @@
 #include "reference_linalg.hpp"
 #include "rng.hpp"
 
-/*
- * Multiply a vector by a uniform scaling factor.
- *
- * This is explicitly noinline since it avoids a compiler bug with GCC 8.2.0
- * where the code is incorrectly inlined into gen_hermitian_matrix.
- */
-static inline void __attribute__((noinline))
-cscal(uint32_t n, armral_cmplx_f32_t *a, armral_cmplx_f32_t s) {
-  for (unsigned i = 0; i < n; ++i) {
-    a[i].re *= s.re;
-    a[i].im *= s.im;
-  }
-}
-
 /*
  * Generate random values, the resulting matrix will have linearly independent
  * columns with probability almost 1.
@@ -261,8 +247,8 @@ static inline bool check_results_identity(const armral_cmplx_f32_t *mat,
                                           uint32_t m, int verbose = 0) {
   bool passed = true;
   // Init arrays
-  armral_cmplx_f32_t id[m * m];
-  armral_cmplx_f32_t mm[m * m];
+  std::vector<armral_cmplx_f32_t> id(m * m);
+  std::vector<armral_cmplx_f32_t> mm(m * m);
   for (unsigned i = 0; i < m; ++i) {
     for (unsigned j = 0; j < m; ++j) {
       if (i == j) {
@@ -280,248 +266,23 @@ static inline bool check_results_identity(const armral_cmplx_f32_t *mat,
   {
     std::vector<std::complex<double>> mm64(m * m);
     reference_zgemm(m, m, m, 1.0F, m64, inv_m64, 0.0F, mm64);
-    convert_vector_to_cf32_array(m * m, mm64, mm);
-    passed &= check_results_mat_inv("MM^{-1} - Id", (float *)mm, (float *)id,
-                                    2 * m * m, (float)m, (float)m, verbose);
+    convert_vector_to_cf32_array(m * m, mm64, mm.data());
+    passed &= check_results_mat_inv("MM^{-1} - Id", (float *)mm.data(),
+                                    (float *)id.data(), 2 * m * m, (float)m,
+                                    (float)m, verbose);
   }
   // MM^{-1}
   {
     std::vector<std::complex<double>> mm64(m * m);
     reference_zgemm(m, m, m, 1.0F, inv_m64, m64, 0.0F, mm64);
-    convert_vector_to_cf32_array(m * m, mm64, mm);
-    passed &= check_results_mat_inv("M^{-1}M - Id", (float *)mm, (float *)id,
-                                    2 * m * m, (float)m, (float)m, verbose);
+    convert_vector_to_cf32_array(m * m, mm64, mm.data());
+    passed &= check_results_mat_inv("M^{-1}M - Id", (float *)mm.data(),
+                                    (float *)id.data(), 2 * m * m, (float)m,
+                                    (float)m, verbose);
   }
   return passed;
 }
 
-/*
- * Reorder matrices to allow easy access to blocks.
- */
-static unsigned zorder_y_of(unsigned index) {
-  unsigned y = 0;
-  for (unsigned b = 0, k = 0; (1U << b) <= index; b += 2, k++) {
-    y += static_cast<unsigned>((index & (1U << b)) != 0) << k;
-  }
-  return y;
-}
-
-static unsigned zorder_x_of(unsigned index) {
-  return zorder_y_of(index >> 1);
-}
-
-/*
- * Convert from z-order to row-major.
- */
-static std::vector<std::complex<double>>
-zorder_to_rowmajor(uint32_t m, const std::vector<std::complex<double>> &z) {
-  std::vector<std::complex<double>> a(m * m);
-  for (unsigned i = 0; i < m; ++i) {
-    for (unsigned j = 0; j < m; ++j) {
-      unsigned ijx = zorder_x_of(i * m + j);
-      unsigned ijy = zorder_y_of(i * m + j);
-      a[ijx * m + ijy] = z[i * m + j];
-    }
-  }
-  return a;
-}
-
-/*
- * Convert from row-major to z-order.
- */
-static std::vector<std::complex<double>>
-rowmajor_to_zorder(uint32_t m, const std::vector<std::complex<double>> &a) {
-  std::vector<std::complex<double>> z(m * m);
-  for (unsigned i = 0; i < m; ++i) {
-    for (unsigned j = 0; j < m; ++j) {
-      unsigned ijx = zorder_x_of(i * m + j);
-      unsigned ijy = zorder_y_of(i * m + j);
-      z[i * m + j] = a[ijx * m + ijy];
-    }
-  }
-  return z;
-}
-
-/*
- * General matrix multiplication on matrices stored in z-order.
- */
-static void reference_zgemm_zorder(uint32_t m, const double alpha,
-                                   const std::vector<std::complex<double>> &a,
-                                   const std::vector<std::complex<double>> &b,
-                                   const double beta,
-                                   std::vector<std::complex<double>> &c) {
-  // Convert to row-major
-  auto a64 = zorder_to_rowmajor(m, a);
-  auto b64 = zorder_to_rowmajor(m, b);
-  auto c64 = zorder_to_rowmajor(m, c);
-
-  // Evaluate double precision matrix multiply
-  reference_zgemm(m, m, m, alpha, a64, b64, beta, c64);
-
-  // Convert back to original order
-  c = rowmajor_to_zorder(m, c64);
-}
-
-static std::vector<std::complex<double>>
-reference_zgeinv_2x2(uint32_t m, const std::vector<std::complex<double>> &mat) {
-  std::vector<std::complex<double>> inv_m(m * m);
-  // Inverse 2x2 matrix using analytic expression
-  std::complex<double> rdet = 1.0 / (mat[0] * mat[3] - mat[1] * mat[2]);
-  inv_m[0] = +rdet * mat[3];
-  inv_m[1] = -rdet * mat[1];
-  inv_m[2] = -rdet * mat[2];
-  inv_m[3] = +rdet * mat[0];
-  return inv_m;
-}
-
-static std::vector<std::complex<double>>
-reference_zgeinv_3x3(uint32_t m, const std::vector<std::complex<double>> &mat) {
-  std::vector<std::complex<double>> inv_m(m * m);
-  auto a0 = mat[0];
-  auto a1 = mat[1];
-  auto a2 = mat[2];
-  auto a3 = mat[4];
-  auto a4 = mat[5];
-  auto a5 = mat[8];
-
-  auto c1 = mat[3];
-  auto c2 = mat[6];
-  auto c4 = mat[7];
-
-  auto adj00 = a3 * a5 - a4 * c4;
-  auto adj11 = a0 * a5 - a2 * c2;
-  auto adj22 = a0 * a3 - a1 * c1;
-
-  auto adj10 = c1 * a5 - c2 * a4;
-  auto adj20 = c1 * c4 - c2 * a3;
-  auto adj01 = a1 * a5 - c4 * a2;
-  auto adj21 = a0 * c4 - c2 * a1;
-  auto adj02 = a1 * a4 - a3 * a2;
-  auto adj12 = a0 * a4 - c1 * a2;
-
-  // Compute cofactors (apply negative signs)
-  adj01 = -adj01;
-  adj10 = -adj10;
-  adj12 = -adj12;
-  adj21 = -adj21;
-
-  // Determinant: A_{0:} * adj(A)_{:0}
-  auto inv_det = 1.0 / (a0 * adj00 + a1 * adj10 + a2 * adj20);
-
-  // Write into output array
-  inv_m[0] = adj00 * inv_det;
-  inv_m[1] = adj01 * inv_det;
-  inv_m[2] = adj02 * inv_det;
-  inv_m[3] = adj10 * inv_det;
-  inv_m[4] = adj11 * inv_det;
-  inv_m[5] = adj12 * inv_det;
-  inv_m[6] = adj20 * inv_det;
-  inv_m[7] = adj21 * inv_det;
-  inv_m[8] = adj22 * inv_det;
-  return inv_m;
-}
-
-/*
- * Matrix Inversion using blockwise approach (recursive implementation).
- *
- * M = [A B]   M^{-1} = [X^{-1}         -A^{-1}BU^{-1}]
- *     [C D]            [-D^{-1}CX^{-1} U^{-1}        ]
- */
-static std::vector<std::complex<double>>
-reference_zgeinv(uint32_t m, const std::vector<std::complex<double>> &mat) {
-  if (m == 2) {
-    return reference_zgeinv_2x2(m, mat);
-  }
-  std::vector<std::complex<double>> inv_m(m * m);
-  // Compute each block separately using reference matrix inversion (recursive
-  // process)
-  unsigned mm = m / 2;
-  std::vector<std::complex<double>> a(mat.begin() + 0 * mm * mm,
-                                      mat.begin() + 1 * mm * mm);
-  std::vector<std::complex<double>> b(mat.begin() + 1 * mm * mm,
-                                      mat.begin() + 2 * mm * mm);
-  std::vector<std::complex<double>> c(mat.begin() + 2 * mm * mm,
-                                      mat.begin() + 3 * mm * mm);
-  std::vector<std::complex<double>> d(mat.begin() + 3 * mm * mm,
-                                      mat.begin() + 4 * mm * mm);
-
-  // Inverse of A and D
-  auto inv_a = reference_zgeinv(mm, a);
-  auto inv_d = reference_zgeinv(mm, d);
-
-  // M00^{-1} = X^{-1} = (A - BD^{-1}C)^{-1}
-  std::vector<std::complex<double>> y(mm * mm);
-  auto x = a;
-  reference_zgemm_zorder(mm, 1.0, inv_d, c, 0.0, y);
-  reference_zgemm_zorder(mm, -1.0, b, y, 1.0, x);
-  auto inv_m00 = reference_zgeinv(mm, x);
-
-  // M10^{-1} = -D^{-1}C X^{-1}
-  std::vector<std::complex<double>> inv_m10(mm * mm);
-  reference_zgemm_zorder(mm, -1.0, y, inv_m00, 0.0, inv_m10);
-
-  // M11^{-1} = U^{-1} = (D - CA^{-1}B)^{-1}
-  std::vector<std::complex<double>> v(mm * mm);
-  auto u = d;
-  reference_zgemm_zorder(mm, 1.0, inv_a, b, 0.0, v);
-  reference_zgemm_zorder(mm, -1.0, c, v, 1.0, u);
-  auto inv_m11 = reference_zgeinv(mm, u);
-
-  // M01 = -A^{-1}B U^{-1}
-  std::vector<std::complex<double>> inv_m01(mm * mm);
-  reference_zgemm_zorder(mm, -1.0, v, inv_m11, 0.0, inv_m01);
-
-  // Set inverse matrix block per block
-  inv_m.clear();
-  inv_m.insert(inv_m.end(), inv_m00.begin(), inv_m00.end());
-  inv_m.insert(inv_m.end(), inv_m01.begin(), inv_m01.end());
-  inv_m.insert(inv_m.end(), inv_m10.begin(), inv_m10.end());
-  inv_m.insert(inv_m.end(), inv_m11.begin(), inv_m11.end());
-
-  return inv_m;
-}
-
-static inline std::vector<std::complex<double>>
-reference_zgeinv_small(uint32_t m,
-                       const std::vector<std::complex<double>> &mat) {
-  if (m == 2) {
-    return reference_zgeinv_2x2(m, mat);
-  }
-  if (m == 3) {
-    return reference_zgeinv_3x3(m, mat);
-  }
-  // GCOVR_EXCL_START
-  assert(false && "Small matrix inverse only defined for m = 2 or m = 3");
-  // GCOVR_EXCL_STOP
-  return {};
-}
-
-/*
- * Run reference Matrix Inversion based on blockwise approach.
- */
-static inline void reference_matinv_block(uint32_t m,
-                                          const armral_cmplx_f32_t *a,
-                                          armral_cmplx_f32_t *b) {
-
-  // Init double precision input matrix (use z-order for easy access to blocks)
-  auto a_tmp = convert_cf32_array_to_vector<double>(m * m, a);
-
-  // Bypass z-ordering for small cases
-  if (m == 2 || m == 3) {
-    auto b_tmp = reference_zgeinv_small(m, a_tmp);
-    convert_vector_to_cf32_array(m * m, b_tmp, b);
-  } else {
-    auto a64 = rowmajor_to_zorder(m, a_tmp);
-
-    // Evaluate double precision inverse
-    auto b64 = reference_zgeinv(m, a64);
-
-    // Round back to single precision
-    auto b_tmp = zorder_to_rowmajor(m, b64);
-    convert_vector_to_cf32_array(m * m, b_tmp, b);
-  }
-}
-
 /*
  * Unpack data from batched format into a contiguous array
  */
@@ -566,3 +327,24 @@ static inline void print_cmplx_mat(const std::string &ref, uint32_t m,
     printf("]\n");
   }
 }
+
+/*
+ * Return the number of floating-point operations required to calculate a length-n
+ * complex dot product
+ */
+static inline uint32_t cmplx_dot_nflops(uint32_t n) {
+  // A complex multiplication requires 6 floating-point operations
+  uint32_t op_mul = 6;
+  // A complex multiply-accumulate requires 8 floating-point operations
+  uint32_t op_mla = 8;
+
+  uint32_t nflops = 0;
+  if (n > 0) {
+    // The cost of multiplying the first two vector entries together
+    nflops += op_mul;
+    // The cost of multiplying the remaining (n-1) vector entries
+    // and accumulating into the dot product
+    nflops += (n - 1) * op_mla;
+  }
+  return nflops;
+}
diff --git a/utils/qint64.hpp b/utils/qint64.hpp
index efb5abaf0684f7574f82bc646c2f5c7496e1fd80..8922edf5d2c174ba35d27bebfccff385b6b91b98 100644
--- a/utils/qint64.hpp
+++ b/utils/qint64.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
diff --git a/utils/reference_linalg.hpp b/utils/reference_linalg.hpp
index 0a05f100f95837305b59040a25bc175d34f1734e..0960d440ef3843528436fc30658e76e89e7dfa57 100644
--- a/utils/reference_linalg.hpp
+++ b/utils/reference_linalg.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #pragma once
 
@@ -12,6 +12,20 @@
 #include <complex>
 #include <vector>
 
+/*
+ * Multiply a vector by a uniform scaling factor.
+ *
+ * This is explicitly noinline since it avoids a compiler bug with GCC 8.2.0
+ * where the code is incorrectly inlined into gen_hermitian_matrix.
+ */
+static inline void __attribute__((noinline))
+cscal(uint32_t n, armral_cmplx_f32_t *a, armral_cmplx_f32_t s) {
+  for (unsigned i = 0; i < n; ++i) {
+    a[i].re *= s.re;
+    a[i].im *= s.im;
+  }
+}
+
 /*
  * ZGEMM: General complex double matrix multiplication C = beta*C + alpha*A*B
  */
@@ -31,6 +45,207 @@ static inline void reference_zgemm(uint16_t m, uint16_t n, uint16_t p,
   }
 }
 
+/*
+ * Reorder matrices to allow easy access to blocks.
+ */
+static unsigned zorder_y_of(unsigned index) {
+  unsigned y = 0;
+  for (unsigned b = 0, k = 0; (1U << b) <= index; b += 2, k++) {
+    y += static_cast<unsigned>((index & (1U << b)) != 0) << k;
+  }
+  return y;
+}
+
+static unsigned zorder_x_of(unsigned index) {
+  return zorder_y_of(index >> 1);
+}
+
+/*
+ * Convert from z-order to row-major.
+ */
+static std::vector<std::complex<double>>
+zorder_to_rowmajor(uint32_t m, const std::vector<std::complex<double>> &z) {
+  std::vector<std::complex<double>> a(m * m);
+  for (unsigned i = 0; i < m; ++i) {
+    for (unsigned j = 0; j < m; ++j) {
+      unsigned ijx = zorder_x_of(i * m + j);
+      unsigned ijy = zorder_y_of(i * m + j);
+      a[ijx * m + ijy] = z[i * m + j];
+    }
+  }
+  return a;
+}
+
+/*
+ * Convert from row-major to z-order.
+ */
+static std::vector<std::complex<double>>
+rowmajor_to_zorder(uint32_t m, const std::vector<std::complex<double>> &a) {
+  std::vector<std::complex<double>> z(m * m);
+  for (unsigned i = 0; i < m; ++i) {
+    for (unsigned j = 0; j < m; ++j) {
+      unsigned ijx = zorder_x_of(i * m + j);
+      unsigned ijy = zorder_y_of(i * m + j);
+      z[i * m + j] = a[ijx * m + ijy];
+    }
+  }
+  return z;
+}
+
+/*
+ * General matrix multiplication on matrices stored in z-order.
+ */
+static void reference_zgemm_zorder(uint32_t m, const double alpha,
+                                   const std::vector<std::complex<double>> &a,
+                                   const std::vector<std::complex<double>> &b,
+                                   const double beta,
+                                   std::vector<std::complex<double>> &c) {
+  // Convert to row-major
+  auto a64 = zorder_to_rowmajor(m, a);
+  auto b64 = zorder_to_rowmajor(m, b);
+  auto c64 = zorder_to_rowmajor(m, c);
+
+  // Evaluate double precision matrix multiply
+  reference_zgemm(m, m, m, alpha, a64, b64, beta, c64);
+
+  // Convert back to original order
+  c = rowmajor_to_zorder(m, c64);
+}
+
+static std::vector<std::complex<double>>
+reference_zgeinv_2x2(uint32_t m, const std::vector<std::complex<double>> &mat) {
+  std::vector<std::complex<double>> inv_m(m * m);
+  // Inverse 2x2 matrix using analytic expression
+  std::complex<double> rdet = 1.0 / (mat[0] * mat[3] - mat[1] * mat[2]);
+  inv_m[0] = +rdet * mat[3];
+  inv_m[1] = -rdet * mat[1];
+  inv_m[2] = -rdet * mat[2];
+  inv_m[3] = +rdet * mat[0];
+  return inv_m;
+}
+
+static std::vector<std::complex<double>>
+reference_zgeinv_3x3(uint32_t m, const std::vector<std::complex<double>> &mat) {
+  std::vector<std::complex<double>> inv_m(m * m);
+  auto a0 = mat[0];
+  auto a1 = mat[1];
+  auto a2 = mat[2];
+  auto a3 = mat[4];
+  auto a4 = mat[5];
+  auto a5 = mat[8];
+
+  auto c1 = mat[3];
+  auto c2 = mat[6];
+  auto c4 = mat[7];
+
+  auto adj00 = a3 * a5 - a4 * c4;
+  auto adj11 = a0 * a5 - a2 * c2;
+  auto adj22 = a0 * a3 - a1 * c1;
+
+  auto adj10 = c1 * a5 - c2 * a4;
+  auto adj20 = c1 * c4 - c2 * a3;
+  auto adj01 = a1 * a5 - c4 * a2;
+  auto adj21 = a0 * c4 - c2 * a1;
+  auto adj02 = a1 * a4 - a3 * a2;
+  auto adj12 = a0 * a4 - c1 * a2;
+
+  // Compute cofactors (apply negative signs)
+  adj01 = -adj01;
+  adj10 = -adj10;
+  adj12 = -adj12;
+  adj21 = -adj21;
+
+  // Determinant: A_{0:} * adj(A)_{:0}
+  auto inv_det = 1.0 / (a0 * adj00 + a1 * adj10 + a2 * adj20);
+
+  // Write into output array
+  inv_m[0] = adj00 * inv_det;
+  inv_m[1] = adj01 * inv_det;
+  inv_m[2] = adj02 * inv_det;
+  inv_m[3] = adj10 * inv_det;
+  inv_m[4] = adj11 * inv_det;
+  inv_m[5] = adj12 * inv_det;
+  inv_m[6] = adj20 * inv_det;
+  inv_m[7] = adj21 * inv_det;
+  inv_m[8] = adj22 * inv_det;
+  return inv_m;
+}
+
+/*
+ * Matrix Inversion using blockwise approach (recursive implementation).
+ *
+ * M = [A B]   M^{-1} = [X^{-1}         -A^{-1}BU^{-1}]
+ *     [C D]            [-D^{-1}CX^{-1} U^{-1}        ]
+ */
+static std::vector<std::complex<double>>
+reference_zgeinv(uint32_t m, const std::vector<std::complex<double>> &mat) {
+  if (m == 2) {
+    return reference_zgeinv_2x2(m, mat);
+  }
+  std::vector<std::complex<double>> inv_m(m * m);
+  // Compute each block separately using reference matrix inversion (recursive
+  // process)
+  unsigned mm = m / 2;
+  std::vector<std::complex<double>> a(mat.begin() + 0 * mm * mm,
+                                      mat.begin() + 1 * mm * mm);
+  std::vector<std::complex<double>> b(mat.begin() + 1 * mm * mm,
+                                      mat.begin() + 2 * mm * mm);
+  std::vector<std::complex<double>> c(mat.begin() + 2 * mm * mm,
+                                      mat.begin() + 3 * mm * mm);
+  std::vector<std::complex<double>> d(mat.begin() + 3 * mm * mm,
+                                      mat.begin() + 4 * mm * mm);
+
+  // Inverse of A and D
+  auto inv_a = reference_zgeinv(mm, a);
+  auto inv_d = reference_zgeinv(mm, d);
+
+  // M00^{-1} = X^{-1} = (A - BD^{-1}C)^{-1}
+  std::vector<std::complex<double>> y(mm * mm);
+  auto x = a;
+  reference_zgemm_zorder(mm, 1.0, inv_d, c, 0.0, y);
+  reference_zgemm_zorder(mm, -1.0, b, y, 1.0, x);
+  auto inv_m00 = reference_zgeinv(mm, x);
+
+  // M10^{-1} = -D^{-1}C X^{-1}
+  std::vector<std::complex<double>> inv_m10(mm * mm);
+  reference_zgemm_zorder(mm, -1.0, y, inv_m00, 0.0, inv_m10);
+
+  // M11^{-1} = U^{-1} = (D - CA^{-1}B)^{-1}
+  std::vector<std::complex<double>> v(mm * mm);
+  auto u = d;
+  reference_zgemm_zorder(mm, 1.0, inv_a, b, 0.0, v);
+  reference_zgemm_zorder(mm, -1.0, c, v, 1.0, u);
+  auto inv_m11 = reference_zgeinv(mm, u);
+
+  // M01 = -A^{-1}B U^{-1}
+  std::vector<std::complex<double>> inv_m01(mm * mm);
+  reference_zgemm_zorder(mm, -1.0, v, inv_m11, 0.0, inv_m01);
+
+  // Set inverse matrix block per block
+  inv_m.clear();
+  inv_m.insert(inv_m.end(), inv_m00.begin(), inv_m00.end());
+  inv_m.insert(inv_m.end(), inv_m01.begin(), inv_m01.end());
+  inv_m.insert(inv_m.end(), inv_m10.begin(), inv_m10.end());
+  inv_m.insert(inv_m.end(), inv_m11.begin(), inv_m11.end());
+
+  return inv_m;
+}
+
+static inline std::vector<std::complex<double>>
+reference_zgeinv_small(uint32_t m,
+                       const std::vector<std::complex<double>> &mat) {
+  if (m == 2) {
+    return reference_zgeinv_2x2(m, mat);
+  }
+  if (m == 3) {
+    return reference_zgeinv_3x3(m, mat);
+  }
+  // GCOVR_EXCL_START
+  assert(false && "Small matrix inverse only defined for m = 2 or m = 3");
+  // GCOVR_EXCL_STOP
+  return {};
+}
+
 /*
  * Converting between armral_cmplx_f32_t and std::complex<double/float>
  */
@@ -117,6 +332,26 @@ static inline void reference_matmul_cf32(uint16_t m, uint16_t n, uint16_t p,
   }
 }
 
+/*
+ * Reference conjugate transpose matrix multiplication (C=B * A^H) on cf32 input
+ * matrices
+ */
+static inline void reference_matmul_bah_cf32(
+    uint16_t m, uint16_t n, const armral_cmplx_f32_t *__restrict p_src_a,
+    const armral_cmplx_f32_t *__restrict p_src_b, armral_cmplx_f32_t *p_dst) {
+  for (uint16_t i = 0; i < n; i++) {
+    for (uint16_t j = 0; j < m; j++) {
+      std::complex<double> dot = 0.;
+      for (uint16_t k = 0; k < n; k++) {
+        auto ah_jk = complex_convert<double>(p_src_a[j * n + k]);
+        auto b_ik = complex_convert<double>(p_src_b[i * n + k]);
+        dot += b_ik * std::conj(ah_jk);
+      }
+      p_dst[i * m + j] = complex_convert(dot);
+    }
+  }
+}
+
 /*
  * Reference conjugate transpose matrix multiplication (C=A^H * B) on cf32 input
  * matrices
@@ -179,22 +414,51 @@ reference_matmul_aah_cf32(uint16_t m, uint16_t n,
 }
 
 /*
- * Return the number of floating-point operatins required to calculate a length-n
- * complex dot product
+ * Reference matrix multiplication (C=A^H*A) on a cf32 input matrix
  */
-static inline uint32_t cmplx_dot_nflops(uint32_t n) {
-  // A complex multiplication requires 6 floating-point operations
-  uint32_t op_mul = 6;
-  // A complex multiply-accumulate requires 8 floating-point operations
-  uint32_t op_mla = 8;
+static inline void
+reference_matmul_aha_cf32(uint16_t m, uint16_t n,
+                          const armral_cmplx_f32_t *__restrict p_src,
+                          armral_cmplx_f32_t *p_dst) {
+  for (uint16_t i = 0; i < n; i++) {
+    for (uint16_t j = 0; j < n; j++) {
+      std::complex<double> dot = 0.;
+      for (uint16_t k = 0; k < m; k++) {
+        uint32_t ah_idx = k * n + i;
+        uint32_t a_idx = k * n + j;
+        dot += std::conj(complex_convert<double>(p_src[ah_idx])) *
+               complex_convert<double>(p_src[a_idx]);
+      }
+      if (i == j) {
+        dot.imag(0.);
+      }
+      p_dst[i * n + j] = complex_convert(dot);
+    }
+  }
+}
+
+/*
+ * Run reference Matrix Inversion based on blockwise approach.
+ */
+static inline void reference_matinv_block(uint32_t m,
+                                          const armral_cmplx_f32_t *a,
+                                          armral_cmplx_f32_t *b) {
+
+  // Init double precision input matrix (use z-order for easy access to blocks)
+  auto a_tmp = convert_cf32_array_to_vector<double>(m * m, a);
+
+  // Bypass z-ordering for small cases
+  if (m == 2 || m == 3) {
+    auto b_tmp = reference_zgeinv_small(m, a_tmp);
+    convert_vector_to_cf32_array(m * m, b_tmp, b);
+  } else {
+    auto a64 = rowmajor_to_zorder(m, a_tmp);
+
+    // Evaluate double precision inverse
+    auto b64 = reference_zgeinv(m, a64);
 
-  uint32_t nflops = 0;
-  if (n > 0) {
-    // The cost of multiplying the first two vector entries together
-    nflops += op_mul;
-    // The cost of multiplying the remaining (n-1) vector entries
-    // and accumulating into the dot product
-    nflops += (n - 1) * op_mla;
+    // Round back to single precision
+    auto b_tmp = zorder_to_rowmajor(m, b64);
+    convert_vector_to_cf32_array(m * m, b_tmp, b);
   }
-  return nflops;
 }
diff --git a/utils/rng.cpp b/utils/rng.cpp
index 7b16c7240110c86efd3b555cfe78d307f5d16c95..e14d97d04617c89840ac0e22e542411c7d066972 100644
--- a/utils/rng.cpp
+++ b/utils/rng.cpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #include "rng.hpp"
diff --git a/utils/rng.hpp b/utils/rng.hpp
index c94d4e4b8d17db374fd47bed8242f89fb2f2bcd2..a6e09dc48d74d226a90a72ee64adcbcda32ac377 100644
--- a/utils/rng.hpp
+++ b/utils/rng.hpp
@@ -1,6 +1,6 @@
 /*
     Arm RAN Acceleration Library
-    Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+    Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 
 #pragma once