diff --git a/bench/LDPC/Encoding/bench.py b/bench/LDPC/Encoding/bench.py
index ae0a7084db980b6ad739f7e86f5c51968f063625..7bfc65b139da749611e610bbdbef6d2a183f3488 100755
--- a/bench/LDPC/Encoding/bench.py
+++ b/bench/LDPC/Encoding/bench.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 # Arm RAN Acceleration Library
 # Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
-
 import json
 import itertools
 from pathlib import Path
@@ -12,14 +11,13 @@ def get_path(x): return x if Path(x).is_file() else os.path.join("armral", x)
 
 
 exe_name = get_path("bench_ldpc_encoding")
-
 j = {
     "exe_name": exe_name,
     "cases": []
 }
-
 base_graphs = [1, 2]
-lifting_sizes = [2, 11, 16, 30, 36, 52, 112, 160, 208, 384]
+lifting_sizes = [2, 11, 16, 18, 30, 36, 52, 112, 160, 208, 384]
+len_filler_bits = [0, 0, 0, 76, 0, 0, 0, 72, 0, 0, 0]
 
 # PERF TESTS for ldpc_encoder, all base graphs and lifting sets
 # target_reps are the number of repetitions to get something
@@ -28,10 +26,10 @@ lifting_sizes = [2, 11, 16, 30, 36, 52, 112, 160, 208, 384]
 # according to the lifting size.
 target_reps = 150000
 
-for bg, z in itertools.product(base_graphs, lifting_sizes):
+for bg, (z, f) in itertools.product(base_graphs, zip(lifting_sizes, len_filler_bits)):
     case = {
-        "name": "ldpc_encoding_bg{}_z{}".format(bg, z),
-        "args": "{} {}".format(bg, z),
+        "name": "ldpc_encoding_bg{}_z{}_f{}".format(bg, z, f),
+        "args": "{} {} {}".format(bg, z, f),
         "reps": target_reps * 2 // z
     }
     j["cases"].append(case)
diff --git a/bench/LDPC/Encoding/main.cpp b/bench/LDPC/Encoding/main.cpp
index 26488f562fa3deb16fbd69e09bd6900213221f8c..1c3c310f8c7cd4414783a6672f8d6a15de6dac3a 100644
--- a/bench/LDPC/Encoding/main.cpp
+++ b/bench/LDPC/Encoding/main.cpp
@@ -8,10 +8,12 @@
 
 namespace {
 
-void run_ldpc_encoding_perf(armral_ldpc_graph_t bg, uint32_t z, uint32_t reps) {
-  printf("[LDPC ENCODING] - base graph = %u, lifting size = %u, number of "
+void run_ldpc_encoding_perf(armral_ldpc_graph_t bg, uint32_t z,
+                            uint32_t len_filler_bits, uint32_t reps) {
+  printf("[LDPC ENCODING] - base graph = %u, lifting size = %u, filler bits "
+         "length = %u, number of "
          "repetitions = %u\n",
-         (uint32_t)bg, z, reps);
+         (uint32_t)bg, z, len_filler_bits, reps);
 
   const auto *graph = armral_ldpc_get_base_graph(bg);
 
@@ -26,14 +28,16 @@ void run_ldpc_encoding_perf(armral_ldpc_graph_t bg, uint32_t z, uint32_t reps) {
 #ifdef ARMRAL_BENCH_NOALLOC
   // Benchmark only added for interest. This is not expected to show any major
   // performance difference.
-  auto buffer_size = armral_ldpc_encode_block_noalloc_buffer_size(bg, z);
+  auto buffer_size =
+      armral_ldpc_encode_block_noalloc_buffer_size(bg, z, len_filler_bits);
   std::vector<uint8_t> buffer(buffer_size);
   for (uint32_t r = 0; r < reps; ++r) {
-    armral_ldpc_encode_block_noalloc(in_ptr, bg, z, out_ptr, buffer.data());
+    armral_ldpc_encode_block_noalloc(in_ptr, bg, z, len_filler_bits, out_ptr,
+                                     buffer.data());
   }
 #else
   for (uint32_t r = 0; r < reps; ++r) {
-    armral_ldpc_encode_block(in_ptr, bg, z, out_ptr);
+    armral_ldpc_encode_block(in_ptr, bg, z, len_filler_bits, out_ptr);
   }
 #endif
 }
@@ -41,21 +45,25 @@ void run_ldpc_encoding_perf(armral_ldpc_graph_t bg, uint32_t z, uint32_t reps) {
 } // anonymous namespace
 
 int main(int argc, char **argv) {
-  if (argc != 4) {
+  if (argc != 5) {
     // base_graph:  integer representing the base graph to use.
     //              This gets converted into the enum representing the base
     //              graph 1 -> LDPC_BASE_GRAPH_1 2 -> LDPC_BASE_GRAPH_2
     // lifting_size:  The lifting size Z to use in the block encoding
+    // len_filler_bits:  Length of filler bits As per section 5.2.2 of TS 38.212
     // num_reps:      The number of times to repeat the encoding, so as to get a
     //                stable performance number
-    printf("Usage: %s base_graph lifting_size num_reps\n", argv[0]);
+    printf("Usage: %s base_graph lifting_size len_filler_bits num_reps\n",
+           argv[0]);
     exit(EXIT_FAILURE);
   }
+
   auto bg = (armral_ldpc_graph_t)(atoi(argv[1]) - 1);
   auto z = (uint32_t)atoi(argv[2]);
-  auto reps = (uint32_t)atoi(argv[3]);
+  auto len_filler_bits = (uint32_t)atoi(argv[3]);
+  auto reps = (uint32_t)atoi(argv[4]);
 
-  run_ldpc_encoding_perf(bg, z, reps);
+  run_ldpc_encoding_perf(bg, z, len_filler_bits, reps);
 
   return EXIT_SUCCESS;
 }
diff --git a/include/armral.h b/include/armral.h
index e4b975c56dd71e4b2d47934a8295696ffa61276b..ab3a27dfccae925bf08372ddf45432480dcd81d1 100644
--- a/include/armral.h
+++ b/include/armral.h
@@ -3301,6 +3301,10 @@ armral_ldpc_get_base_graph(armral_ldpc_graph_t bg);
  *                            the block.
  * @param[in] z               The lifting size. Valid values of the lifting
  *                            size are described in table 5.3.2-1 in TS 38.212.
+ * @param[in] len_filler_bits The number of filler bits. As per TS 38.212,
+ *                            section 5.2.2, filler bits insertion is needed to
+ *                            ensure that the code block segments have a valid
+ *                            length and are a multiple of the lifting size.
  * @param[out] data_out       The codeword to be transmitted. `data_out` has the
  *                            first two columns for the base graphs punctured,
  *                            and contains the information and calculated
@@ -3309,6 +3313,7 @@ armral_ldpc_get_base_graph(armral_ldpc_graph_t bg);
  */
 armral_status armral_ldpc_encode_block(const uint8_t *data_in,
                                        armral_ldpc_graph_t bg, uint32_t z,
+                                       uint32_t len_filler_bits,
                                        uint8_t *data_out);
 
 /**
@@ -3352,6 +3357,10 @@ armral_status armral_ldpc_encode_block(const uint8_t *data_in,
  *                            the block.
  * @param[in] z               The lifting size. Valid values of the lifting
  *                            size are described in table 5.3.2-1 in TS 38.212.
+ * @param[in] len_filler_bits The number of filler bits. As per TS 38.212,
+ *                            section 5.2.2, filler bits insertion is needed to
+ *                            ensure that the code block segments have a valid
+ *                            length and are a multiple of the lifting size.
  * @param[out] data_out       The codeword to be transmitted. `data_out` has the
  *                            first two columns for the base graphs punctured,
  *                            and contains the information and calculated
@@ -3361,25 +3370,31 @@ armral_status armral_ldpc_encode_block(const uint8_t *data_in,
  */
 armral_status armral_ldpc_encode_block_noalloc(const uint8_t *data_in,
                                                armral_ldpc_graph_t bg,
-                                               uint32_t z, uint8_t *data_out,
-                                               void *buffer);
+                                               uint32_t z,
+                                               uint32_t len_filler_bits,
+                                               uint8_t *data_out, void *buffer);
 
 /**
  * Calculates the required buffer size in bytes to encode a single code block
  * using LDPC for a given base graph and lifting size `z`.
  *
- * @param[in] bg  Identifier for the base graph to use for
- *                encoding. TS 38.212 defines two base graphs in
- *                table 5.3.2-2 and 5.3.2-3. The base graph, in
- *                combination with the lifting size `z`, determines
- *                the block size and the graph to use for encoding
- *                the block.
- * @param[in] z   The lifting size. Valid values of the lifting
- *                size are described in table 5.3.2-1 in TS 38.212.
+ * @param[in] bg              Identifier for the base graph to use for
+ *                            encoding. TS 38.212 defines two base graphs in
+ *                            table 5.3.2-2 and 5.3.2-3. The base graph, in
+ *                            combination with the lifting size `z`, determines
+ *                            the block size and the graph to use for encoding
+ *                            the block.
+ * @param[in] z               The lifting size. Valid values of the lifting
+ *                            size are described in table 5.3.2-1 in TS 38.212.
+ * @param[in] len_filler_bits The number of filler bits. As per TS 38.212,
+ *                            section 5.2.2, filler bits insertion is needed to
+ *                            ensure that the code block segments have a valid
+ *                            length and are a multiple of the lifting size.
  * @return    The required buffer size in bytes.
  */
 uint32_t armral_ldpc_encode_block_noalloc_buffer_size(armral_ldpc_graph_t bg,
-                                                      uint32_t z);
+                                                      uint32_t z,
+                                                      uint32_t len_filler_bits);
 
 /**
  * Performs decoding of LDPC using a layered min-sum algorithm. This is an
@@ -3512,33 +3527,42 @@ uint32_t armral_ldpc_decode_block_noalloc_buffer_size(armral_ldpc_graph_t bg,
  * multiple of the modulation order (i.e. the number of bits per modulation
  * symbol).
  *
- * @param[in]  bg   The type of base graph for which rate matching is to be
- *                  performed.
- * @param[in]  z    The lifting size. Valid values of the lifting size are
- *                  described in table 5.3.2-1 in TS 38.212.
- * @param[in]  e    The number of bits in the rate-matched message. This is
- *                  assumed to be a multiple of the number of bits per
- *                  modulation symbol.
- * @param[in]  nref The soft buffer size for limited buffer rate matching.
- *                  `nref` is defined in TS 38.212 section 5.4.2.1.
- * @param[in]  rv   Redundancy version used in rate matching. Must be in the
- *                  set `{0, 1, 2, 3}`. The effect of choosing different
- *                  redundancy versions is described in table 5.4.2.1-2 of TS
- *                  38.212.
- * @param[in]  mod  The type of modulation to perform. Required to perform
- *                  bit-interleaving, as described in section 5.4.2 of TS
- *                  38.212.
- * @param[in]  src  Input array. This is assumed to be the output of LDPC
- *                  encoding. This contains `66 * z` bits in the case that base
- *                  graph 1 is used, and `50 * z` bits in the case that base
- *                  graph 2 is used.
- * @param[out] dst  Contains `e` bits of data, which is the rate-matched data
- *                  ready to be passed to modulation.
+ * @param[in]  bg             The type of base graph for which rate matching is
+ *                            to be performed.
+ * @param[in]  z              The lifting size. Valid values of the lifting size
+ *                            are described in table 5.3.2-1 in TS 38.212.
+ * @param[in]  e              The number of bits in the rate-matched message.
+ *                            This is assumed to be a multiple of the number of
+ *                            bits per modulation symbol.
+ * @param[in]  nref           The soft buffer size for limited buffer rate
+ *                            matching. `nref` is defined in TS 38.212
+ *                            section 5.4.2.1.
+ * @param[in] len_filler_bits The number of filler bits. As per TS 38.212,
+ *                            section 5.2.2, filler bits insertion is needed to
+ *                            ensure that the code block segments have a valid
+ *                            length and are a multiple of the lifting size.
+ * @param[in]  k              codeblock size, the number of bits to encode as
+ *                            per section 5.3.2 of TS 38.212.
+ * @param[in]  rv             Redundancy version used in rate matching. Must be
+ *                            in the set `{0, 1, 2, 3}`. The effect of choosing
+ *                            different redundancy versions is described in
+ *                            table 5.4.2.1-2 of TS 38.212.
+ * @param[in]  mod            The type of modulation to perform. Required to
+ *                            perform bit-interleaving, as described in
+ *                            section 5.4.2 of TS 38.212.
+ * @param[in]  src            Input array. This is assumed to be the output of
+ *                            LDPC encoding. This contains `66 * z` bits in the
+ *                            case that base graph 1 is used, and `50 * z` bits
+ *                            in the case that base graph 2 is used.
+ * @param[out] dst            Contains `e` bits of data, which is the
+ *                            rate-matched data ready to be passed to
+ *                            modulation.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_ldpc_rate_matching(armral_ldpc_graph_t bg, uint32_t z,
-                                        uint32_t e, uint32_t nref, uint32_t rv,
-                                        armral_modulation_type mod,
+                                        uint32_t e, uint32_t nref,
+                                        uint32_t len_filler_bits, uint32_t k,
+                                        uint32_t rv, armral_modulation_type mod,
                                         const uint8_t *src, uint8_t *dst);
 
 /**
@@ -3563,35 +3587,44 @@ armral_status armral_ldpc_rate_matching(armral_ldpc_graph_t bg, uint32_t z,
  * This function takes a pre-allocated buffer (`buffer`) to use internally.
  * This variant will not call any system memory allocators.
  *
- * The buffer must be at least `e * sizeof(uint8_t)` bytes.
- *
- * @param[in]  bg      The type of base graph for which rate matching is to be
- *                     performed.
- * @param[in]  z       The lifting size. Valid values of the lifting size are
- *                     described in table 5.3.2-1 in TS 38.212.
- * @param[in]  e       The number of bits in the rate-matched message. This is
- *                     assumed to be a multiple of the number of bits per
- *                     modulation symbol.
- * @param[in]  nref    The soft buffer size for limited buffer rate matching.
- *                     `nref` is defined in TS 38.212 section 5.4.2.1.
- * @param[in]  rv      Redundancy version used in rate matching. Must be in the
- *                     set `{0, 1, 2, 3}`. The effect of choosing different
- *                     redundancy versions is described in table 5.4.2.1-2 of TS
- *                     38.212.
- * @param[in]  mod     The type of modulation to perform. Required to perform
- *                     bit-interleaving, as described in section 5.4.2 of TS
- *                     38.212.
- * @param[in]  src     Input array. This is assumed to be the output of LDPC
- *                     encoding. This contains `66 * z` bits in the case that
- *                     base graph 1 is used, and `50 * z` bits in the case that
- *                     base graph 2 is used.
- * @param[out] dst     Contains `e` bits of data, which is the rate-matched data
- *                     ready to be passed to modulation.
- * @param[in]  buffer  Workspace buffer to be used internally.
+ * The buffer must be at least `((2 * z * 66) + e) * sizeof(uint8_t)` bytes.
+ *
+ * @param[in]  bg             The type of base graph for which rate matching is
+ *                            to be performed.
+ * @param[in]  z              The lifting size. Valid values of the lifting size
+ *                            are described in table 5.3.2-1 in TS 38.212.
+ * @param[in]  e              The number of bits in the rate-matched message.
+ *                            This is assumed to be a multiple of the number of
+ *                            bits per modulation symbol.
+ * @param[in]  nref           The soft buffer size for limited buffer rate
+ *                            matching. `nref` is defined in TS 38.212
+ *                            section 5.4.2.1.
+ * @param[in] len_filler_bits The number of filler bits. As per TS 38.212,
+ *                            section 5.2.2, filler bits insertion is needed to
+ *                            ensure that the code block segments have a valid
+ *                            length and are a multiple of the lifting size.
+ * @param[in]  k              codeblock size, the number of bits to encode as
+ *                            per section 5.3.2 of TS 38.212.
+ * @param[in]  rv             Redundancy version used in rate matching. Must be
+ *                            in the set `{0, 1, 2, 3}`. The effect of choosing
+ *                            different redundancy versions is described in
+ *                            table 5.4.2.1-2 of TS 38.212.
+ * @param[in]  mod            The type of modulation to perform. Required to
+ *                            perform bit-interleaving, as described in
+ *                            section 5.4.2 of TS 38.212.
+ * @param[in]  src            Input array. This is assumed to be the output of
+ *                            LDPC encoding. This contains `66 * z` bits in the
+ *                            case that base graph 1 is used, and `50 * z` bits
+ *                            in the case that base graph 2 is used.
+ * @param[out] dst            Contains `e` bits of data, which is the
+ *                            rate-matched data ready to be passed to
+ *                            modulation.
+ * @param[in]  buffer         Workspace buffer to be used internally.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_ldpc_rate_matching_noalloc(
-    armral_ldpc_graph_t bg, uint32_t z, uint32_t e, uint32_t nref, uint32_t rv,
+    armral_ldpc_graph_t bg, uint32_t z, uint32_t e, uint32_t nref,
+    uint32_t len_filler_bits, uint32_t k, uint32_t rv,
     armral_modulation_type mod, const uint8_t *src, uint8_t *dst, void *buffer);
 
 /**
@@ -3618,36 +3651,45 @@ armral_status armral_ldpc_rate_matching_noalloc(
  * approximation to LLRs. The LLRs calculated from the rate-recovery are summed
  * to existing LLRs in the output array.
  *
- * @param[in]  bg      The type of base graph for which rate recovery is to be
- *                     performed.
- * @param[in]  z       The lifting size. Valid values of the lifting size are
- *                     described in table 5.3.2-1 in TS 38.212.
- * @param[in]  e       The number of LLRs in the demodulated message. Assumed
- *                     to be a multiple of the number of bits per modulation
- *                     symbol.
- * @param[in]  nref    The soft buffer size for limited buffer rate matching.
- *                     `nref` is defined in TS 38.212 section 5.4.2.1.
- * @param[in]  rv      Redundancy version used in rate recovery. Must be in the
- *                     set `{0, 1, 2, 3}`. The effect of choosing different
- *                     redundancy versions is described in table 5.4.2.1-2 of
- *                     TS 38.212.
- * @param[in]  mod     The type of modulation which was performed. Required to
- *                     perform bit-deinterleaving as the inverse of the
- *                     bit-interleaving described in section 5.4.2 of TS
- *                     38.212.
- * @param[in]  src     Input array of a total of `e` 8-bit LLRs. This is the
- *                     output after demodulation.
- * @param[in,out] dst  On entry, contains the current approximation to LLRs.
- *                     If no approximation of the LLRs is known, all entries
- *                     must be set to zero. The array has length `66 * z` for
- *                     base graph 1, and `50 * z` for base graph 2. On exit,
- *                     updated rate-recovered 8-bit LLRs, which are ready to be
- *                     passed to decoding.
+ * @param[in]     bg           The type of base graph for which rate matching
+ *                             is to be performed.
+ * @param[in]     z            The lifting size. Valid values of the lifting
+ *                             size are described in table 5.3.2-1 in
+ *                             TS 38.212.
+ * @param[in]     e            The number of bits in the rate-matched message.
+ *                             This is assumed to be a multiple of the number of
+ *                             bits per modulation symbol.
+ * @param[in]    nref          The soft buffer size for limited buffer rate
+ *                             matching. `nref` is defined in TS 38.212
+ *                             section 5.4.2.1.
+ * @param[in]  len_filler_bits The number of filler bits. As per TS 38.212,
+ *                             section 5.2.2, filler bits insertion is needed to
+ *                             ensure that the code block segments have a valid
+ *                             length and are a multiple of the lifting size.
+ * @param[in]     k            codeblock size, the number of bits to encode as
+ *                             per section 5.3.2 of TS 38.212.
+ * @param[in]     rv           Redundancy version used in rate matching. Must be
+ *                             in the set `{0, 1, 2, 3}`. The effect of choosing
+ *                             different redundancy versions is described in
+ *                             table 5.4.2.1-2 of TS 38.212.
+ * @param[in]     mod          The type of modulation to perform. Required to
+ *                             perform bit-interleaving, as described in
+ *                             section 5.4.2 of TS 38.212.
+ * @param[in]     src          Input array of a total of `e` 8-bit LLRs.
+ *                             This is the output after demodulation.
+ * @param[in,out] dst          On entry, contains the current approximation to
+ *                             LLRs. If no approximation of the LLRs is known,
+ *                             all entries must be set to zero. The array has
+ *                             length `66 * z` for base graph 1, and `50 * z`
+ *                             for base graph 2. On exit, updated rate-recovered
+ *                             8-bit LLRs, which are ready to be passed to
+ *                             decoding.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_ldpc_rate_recovery(armral_ldpc_graph_t bg, uint32_t z,
-                                        uint32_t e, uint32_t nref, uint32_t rv,
-                                        armral_modulation_type mod,
+                                        uint32_t e, uint32_t nref,
+                                        uint32_t len_filler_bits, uint32_t k,
+                                        uint32_t rv, armral_modulation_type mod,
                                         const int8_t *src, int8_t *dst);
 
 /**
@@ -3679,38 +3721,46 @@ armral_status armral_ldpc_rate_recovery(armral_ldpc_graph_t bg, uint32_t z,
  * This function takes a pre-allocated buffer (`buffer`) to use internally.
  * This variant will not call any system memory allocators.
  *
- * The buffer must be at least `e * sizeof(uint8_t)` bytes.
- *
- * @param[in]  bg      The type of base graph for which rate recovery is to be
- *                     performed.
- * @param[in]  z       The lifting size. Valid values of the lifting size are
- *                     described in table 5.3.2-1 in TS 38.212.
- * @param[in]  e       The number of LLRs in the demodulated message. Assumed
- *                     to be a multiple of the number of bits per modulation
- *                     symbol.
- * @param[in]  nref    The soft buffer size for limited buffer rate matching.
- *                     `nref` is defined in TS 38.212 section 5.4.2.1.
- * @param[in]  rv      Redundancy version used in rate recovery. Must be in the
- *                     set `{0, 1, 2, 3}`. The effect of choosing different
- *                     redundancy versions is described in table 5.4.2.1-2 of
- *                     TS 38.212.
- * @param[in]  mod     The type of modulation which was performed. Required to
- *                     perform bit-deinterleaving as the inverse of the
- *                     bit-interleaving described in section 5.4.2 of TS
- *                     38.212.
- * @param[in]  src     Input array of a total of `e` 8-bit LLRs. This is the
- *                     output after demodulation.
- * @param[in,out] dst  On entry, contains the current approximation to LLRs.
- *                     If no approximation of the LLRs is known, all entries
- *                     must be set to zero. The array has length `66 * z` for
- *                     base graph 1, and `50 * z` for base graph 2. On exit,
- *                     updated rate-recovered 8-bit LLRs, which are ready to be
- *                     passed to decoding.
- * @param[in]  buffer  Workspace buffer to be used internally.
+ * The buffer must be at least `((z* 66) + e ) * sizeof(uint8_t)` bytes.
+ *
+ * @param[in]     bg          The type of base graph for which rate matching is
+ *                            to be performed.
+ * @param[in]     z           The lifting size. Valid values of the lifting size
+ *                            are described in table 5.3.2-1 in TS 38.212.
+ * @param[in]     e           The number of bits in the rate-matched message.
+ *                            This is assumed to be a multiple of the number of
+ *                            bits per modulation symbol.
+ * @param[in]    nref         The soft buffer size for limited buffer rate
+ *                            matching. `nref` is defined in TS 38.212
+ *                            section 5.4.2.1.
+ * @param[in] len_filler_bits The number of filler bits. As per TS 38.212,
+ *                            section 5.2.2, filler bits insertion is needed to
+ *                            ensure that the code block segments have a valid
+ *                            length and are a multiple of the lifting size.
+ * @param[in]     k           codeblock size, the number of bits to encode as
+ *                            per section 5.3.2 of TS 38.212.
+ * @param[in]     rv          Redundancy version used in rate matching. Must be
+ *                            in the set `{0, 1, 2, 3}`. The effect of choosing
+ *                            different redundancy versions is described in
+ *                            table 5.4.2.1-2 of TS 38.212.
+ * @param[in]     mod         The type of modulation to perform. Required to
+ *                            perform bit-interleaving, as described in
+ *                            section 5.4.2 of TS 38.212.
+ * @param[in]     src         Input array of a total of `e` 8-bit LLRs.
+ *                            This is the output after demodulation.
+ * @param[in,out] dst         On entry, contains the current approximation to
+ *                            LLRs. If no approximation of the LLRs is known,
+ *                            all entries must be set to zero. The array has
+ *                            length `66 * z` for base graph 1, and `50 * z`
+ *                            for base graph 2. On exit, updated rate-recovered
+ *                            8-bit LLRs, which are ready to be passed to
+ *                            decoding.
+ * @param[in]     buffer      Workspace buffer to be used internally.
  * @return     An `armral_status` value that indicates success or failure.
  */
 armral_status armral_ldpc_rate_recovery_noalloc(
-    armral_ldpc_graph_t bg, uint32_t z, uint32_t e, uint32_t nref, uint32_t rv,
+    armral_ldpc_graph_t bg, uint32_t z, uint32_t e, uint32_t nref,
+    uint32_t len_filler_bits, uint32_t k, uint32_t rv,
     armral_modulation_type mod, const int8_t *src, int8_t *dst, void *buffer);
 /** @} end ldpc */
 
diff --git a/simulation/README.md b/simulation/README.md
index 1518db3625eb30d21988a081f6e16464f98792b5..8540d800c3fc0c1c3a8db527238adeabf535b4b2 100644
--- a/simulation/README.md
+++ b/simulation/README.md
@@ -247,7 +247,7 @@ You can run the `LDPC` coding Additive White Gaussian Noise (AWGN) simulation
 with the following parameters:
 
       ldpc_awgn -z lifting_size -b base_graph -m mod_type
-                [-r redundancy_version] [-u demod_ulp]
+                [-r redundancy_version] [-u demod_ulp] [-f len_filler_bits]
 
 For each value of the `Eb / N0` ratio used, a JSON record is written to stdout.
 The JSON record contains the following fields:
@@ -260,6 +260,7 @@ The JSON record contains the following fields:
         "Eb/N0": <eb_n0>,
         "snr": <snr>,
         "ulp": <demod_ulp>,
+        "len_filler_bits":<len_filler_bits>
         "bler": <bler>,
         "ber": <ber>
       }
diff --git a/simulation/ldpc_awgn/ldpc_awgn.cpp b/simulation/ldpc_awgn/ldpc_awgn.cpp
index dd045ebe2b450132c8fbb668d7627d8693775f97..59324880a213717caf61f0ee05dc9c03136b7750 100644
--- a/simulation/ldpc_awgn/ldpc_awgn.cpp
+++ b/simulation/ldpc_awgn/ldpc_awgn.cpp
@@ -83,7 +83,7 @@ std::string print_valid_base_graph() {
 void usage(const char *exe_name) {
   std::cout
       << "Usage: " << exe_name << " -z lifting_size -b base_graph -m mod_type "
-      << "[-r redundancy_version] [-u demod_ulp]\n\n"
+      << "[-r redundancy_version] [-u demod_ulp] [-f len_filler_bits]\n\n"
       << "The arguments required by " << exe_name << " are:\n\n"
       << "  <lifting_size>       Lifting size. Supported values are:\n"
       << print_valid_lifting("\t\t\t")
@@ -102,11 +102,16 @@ void usage(const char *exe_name) {
       << "                       the symbol amplitudes are multiplied by a\n"
       << "                       scaling factor of 0x1p15/<demod_ulp>.\n"
       << "                       Default value is 128.\n"
+      << "  <len_filler_bits>    Filler bits length used to simulate case\n"
+      << "                       where transport block length is not multiple\n"
+      << "                       of Lifting size .\n"
+      << "                       Default length is 0.\n"
       << std::endl;
 }
 
 struct ldpc_example_data {
   uint32_t len_in;
+  uint32_t len_filler_bits;
   uint32_t len_encoded;
   uint32_t len_rate_matched;
   uint32_t len_out;
@@ -126,9 +131,11 @@ struct ldpc_example_data {
   uint8_t *data_decoded_bytes;
 
   ldpc_example_data(uint32_t z, armral_modulation_type mod,
-                    const armral_ldpc_base_graph_t *graph) {
+                    const armral_ldpc_base_graph_t *graph,
+                    uint16_t filler_bits_len) {
     mod_type = mod;
     len_in = z * graph->nmessage_bits;
+    len_filler_bits = filler_bits_len;
     len_encoded = z * graph->ncodeword_bits;
     len_out = len_encoded + 2 * z;
     data_in = SNEW(uint8_t, (len_in + 7) / 8);
@@ -167,7 +174,7 @@ int run_check(random_state *state, uint32_t z, armral_ldpc_graph_t bg,
               ldpc_example_data *data) {
   // Init data
   memset(data->data_in, 0, (data->len_in + 7) / 8 * sizeof(uint8_t));
-  for (uint32_t i = 0; i < data->len_in; ++i) {
+  for (uint32_t i = 0; i < (data->len_in - data->len_filler_bits); ++i) {
     uint8_t bit =
         static_cast<uint8_t>(linear_congruential_generator{}.one<bool>(state));
     uint16_t byte_ind = i / 8;
@@ -179,7 +186,8 @@ int run_check(random_state *state, uint32_t z, armral_ldpc_graph_t bg,
   }
 
   // Run ldpc encoding for a single block
-  armral_ldpc_encode_block(data->data_in, bg, z, data->data_encoded);
+  armral_ldpc_encode_block(data->data_in, bg, z, data->len_filler_bits,
+                           data->data_encoded);
   // To make it easier to compare the bits, convert the bit array to a byte
   // array
   bits_to_bytes(data->len_encoded, data->data_encoded,
@@ -187,9 +195,9 @@ int run_check(random_state *state, uint32_t z, armral_ldpc_graph_t bg,
 
   // Rate match data_encoded to create an array of length e bits from
   // num_mod_symbols * bit_per_symbol bits.
-  armral_ldpc_rate_matching(bg, z, data->len_rate_matched, data->nref, rv,
-                            data->mod_type, data->data_encoded,
-                            data->data_matched);
+  armral_ldpc_rate_matching(
+      bg, z, data->len_rate_matched, data->nref, data->len_filler_bits,
+      data->len_in, rv, data->mod_type, data->data_encoded, data->data_matched);
 
   // Run modulation
   armral_modulation(data->num_mod_symbols * data->bit_per_symbol,
@@ -208,7 +216,8 @@ int run_check(random_state *state, uint32_t z, armral_ldpc_graph_t bg,
   memset(data->data_recovered, 0, data->len_encoded);
 
   // Rate recovery inverses rate matching
-  armral_ldpc_rate_recovery(bg, z, data->len_rate_matched, data->nref, rv,
+  armral_ldpc_rate_recovery(bg, z, data->len_rate_matched, data->nref,
+                            data->len_filler_bits, data->len_in, rv,
                             data->mod_type, data->data_demod_soft,
                             data->data_recovered);
 
@@ -244,10 +253,11 @@ int run_check(random_state *state, uint32_t z, armral_ldpc_graph_t bg,
 struct sim_result {
   sim_result(uint32_t n_in, armral_ldpc_graph_t bg_in,
              armral_modulation_type mod, uint32_t rv_in, double ebn0_in,
-             double snr_in, uint16_t ulp_in, uint32_t nb, uint32_t nm,
-             uint32_t num_messages)
+             double snr_in, uint16_t ulp_in, uint16_t filler_bits_len,
+             uint32_t nb, uint32_t nm, uint32_t num_messages)
     : n(n_in), bg((int)bg_in + 1), mod_type(armral_simulation::mod_to_str(mod)),
       rv(rv_in), ebn0(ebn0_in), snr(snr_in), ulp(ulp_in),
+      len_filler_bits(filler_bits_len),
       bler(static_cast<double>(nm) / num_messages),
       ber(static_cast<double>(nb) / (num_messages * n_in)) {}
 
@@ -258,6 +268,7 @@ struct sim_result {
   double ebn0;
   double snr;
   uint16_t ulp;
+  uint16_t len_filler_bits;
   double bler;
   double ber;
 
@@ -267,7 +278,8 @@ struct sim_result {
     s.setf(std::ios::fixed, std::ios::floatfield);
     s << "{\"n\": " << n << ", \"bg\": " << bg << ", \"mod_type\": \""
       << mod_type << "\", \"rv\": " << rv << ", \"Eb/N0\": " << ebn0
-      << ", \"snr\": " << snr << ", \"ulp\": " << ulp << ", \"bler\": " << bler
+      << ", \"snr\": " << snr << ", \"ulp\": " << ulp
+      << ",  \"len_filler_bits\": " << len_filler_bits << ",\"bler\": " << bler
       << ", \"ber\": " << ber << "}";
     return std::move(s).str();
   }
@@ -275,7 +287,7 @@ struct sim_result {
 
 bool run_snr(uint32_t z, armral_modulation_type mod_type,
              armral_ldpc_graph_t bg, uint32_t rv, uint16_t ulp,
-             double ebn0_db) {
+             uint16_t len_filler_bits, double ebn0_db) {
   const auto *graph = armral_ldpc_get_base_graph(bg);
   // Compute SNR in dB
   int bits_per_symb = armral_simulation::bits_per_symbol(mod_type);
@@ -306,7 +318,7 @@ bool run_snr(uint32_t z, armral_modulation_type mod_type,
     uint64_t nr = 1e4;
 #pragma omp parallel reduction(+ : nb, num_message_errors)
     {
-      ldpc_example_data data(z, mod_type, graph);
+      ldpc_example_data data(z, mod_type, graph, len_filler_bits);
 #pragma omp for
       for (uint64_t r = 0; r < nr; ++r) {
         auto state = random_state::from_seeds({r, nr_total});
@@ -322,8 +334,8 @@ bool run_snr(uint32_t z, armral_modulation_type mod_type,
       static_cast<double>(num_message_errors) / nr_total;
 
   // Write out data in JSON format
-  std::cout << sim_result(n, bg, mod_type, rv, ebn0_db, snr_db, ulp, nb,
-                          num_message_errors, nr_total)
+  std::cout << sim_result(n, bg, mod_type, rv, ebn0_db, snr_db, ulp,
+                          len_filler_bits, nb, num_message_errors, nr_total)
                    .to_str()
             << std::endl;
 
@@ -341,13 +353,14 @@ int main(int argc, char **argv) {
   bool is_bg_set = false;
   uint32_t rv = 0;
   uint16_t ulp = 0;
+  uint16_t len_filler_bits = 0;
   armral_modulation_type mod_type = ARMRAL_MOD_256QAM;
   bool is_mod_set = false;
   bool print_usage = false;
 
   // Parse arguments
   int option;
-  while ((option = getopt(argc, argv, "z:b:m:r:u:")) != -1) {
+  while ((option = getopt(argc, argv, "z:b:m:r:u:f:")) != -1) {
     switch (option) {
     case 'z':
       z = (uint32_t)atoi(optarg);
@@ -367,6 +380,9 @@ int main(int argc, char **argv) {
     case 'u':
       ulp = (uint16_t)atoi(optarg);
       break;
+    case 'f':
+      len_filler_bits = (uint16_t)atoi(optarg);
+      break;
     default:
       print_usage = true;
     }
@@ -413,7 +429,8 @@ int main(int argc, char **argv) {
     ulp = 128;
   }
 
-  for (double ebn0 = -2; run_snr(z, mod_type, bg, rv, ulp, ebn0); ebn0 += 0.5) {
+  for (double ebn0 = -2;
+       run_snr(z, mod_type, bg, rv, ulp, len_filler_bits, ebn0); ebn0 += 0.5) {
   }
 
   return 0;
diff --git a/src/UpperPHY/LDPC/ldpc_encoder.cpp b/src/UpperPHY/LDPC/ldpc_encoder.cpp
index 7ea8335cb1b1d0cda76c2b943dbccb45dd78fbe2..20ee82b97f15f73e7dd518cd5bc799a08a8fea13 100644
--- a/src/UpperPHY/LDPC/ldpc_encoder.cpp
+++ b/src/UpperPHY/LDPC/ldpc_encoder.cpp
@@ -1756,15 +1756,14 @@ inline void calc_hdsm_rhs(uint32_t z, const uint8_t *parity_hdsm,
 
 template<typename Allocator>
 armral_status ldpc_encode_block(const uint8_t *data_in, armral_ldpc_graph_t bg,
-                                uint32_t z, uint8_t *data_out,
-                                Allocator &allocator) {
+                                uint32_t z, uint32_t len_filler_bits,
+                                uint8_t *data_out, Allocator &allocator) {
 
   // Get a pointer to the graph to be working with
   const auto *graph = armral_ldpc_get_base_graph(bg);
   assert(graph);
 
-  auto bytes_in =
-      allocate_uninitialized<uint8_t>(allocator, z * graph->nmessage_bits);
+  auto bytes_in = allocate_zeroed<uint8_t>(allocator, z * graph->nmessage_bits);
   auto parity_hdsm = allocate_zeroed<uint8_t>(allocator, 4 * z);
   auto codeword =
       allocate_zeroed<uint8_t>(allocator, (graph->ncodeword_bits + 2) * z);
@@ -1773,8 +1772,9 @@ armral_status ldpc_encode_block(const uint8_t *data_in, armral_ldpc_graph_t bg,
     return ARMRAL_SUCCESS;
   }
 
-  // Cast the bits to bytes for easier handling of data
-  bits_to_bytes(z * graph->nmessage_bits, data_in, bytes_in.get());
+  // Cast the bits to bytes for easier handling of data, ignore filler bits if present
+  bits_to_bytes(z * graph->nmessage_bits - len_filler_bits, data_in,
+                bytes_in.get());
 
   // Get the lifting set index
   auto lsi = armral_ldpc::get_ldpc_lifting_index(z);
@@ -1819,23 +1819,27 @@ armral_status ldpc_encode_block(const uint8_t *data_in, armral_ldpc_graph_t bg,
 
 armral_status armral_ldpc_encode_block(const uint8_t *data_in,
                                        armral_ldpc_graph_t bg, uint32_t z,
+                                       uint32_t len_filler_bits,
                                        uint8_t *data_out) {
   heap_allocator allocator{};
-  return ldpc_encode_block(data_in, bg, z, data_out, allocator);
+  return ldpc_encode_block(data_in, bg, z, len_filler_bits, data_out,
+                           allocator);
 }
 
-armral_status armral_ldpc_encode_block_noalloc(const uint8_t *data_in,
-                                               armral_ldpc_graph_t bg,
-                                               uint32_t z, uint8_t *data_out,
-                                               void *buffer) {
+armral_status
+armral_ldpc_encode_block_noalloc(const uint8_t *data_in, armral_ldpc_graph_t bg,
+                                 uint32_t z, uint32_t len_filler_bits,
+                                 uint8_t *data_out, void *buffer) {
   buffer_bump_allocator allocator{buffer};
-  return ldpc_encode_block(data_in, bg, z, data_out, allocator);
+  return ldpc_encode_block(data_in, bg, z, len_filler_bits, data_out,
+                           allocator);
 }
 
-uint32_t armral_ldpc_encode_block_noalloc_buffer_size(armral_ldpc_graph_t bg,
-                                                      uint32_t z) {
+uint32_t
+armral_ldpc_encode_block_noalloc_buffer_size(armral_ldpc_graph_t bg, uint32_t z,
+                                             uint32_t len_filler_bits) {
   counting_allocator allocator{};
-  (void)ldpc_encode_block(nullptr, bg, z, nullptr, allocator);
+  (void)ldpc_encode_block(nullptr, bg, z, len_filler_bits, nullptr, allocator);
   return allocator.required_bytes();
 }
 
diff --git a/src/UpperPHY/LDPC/ldpc_rate_matching.cpp b/src/UpperPHY/LDPC/ldpc_rate_matching.cpp
index 541077b1f1515f4b96798e05db669ff2ce2caa15..b14ea72a0a99839b75fcdf2a802174c9571d114b 100644
--- a/src/UpperPHY/LDPC/ldpc_rate_matching.cpp
+++ b/src/UpperPHY/LDPC/ldpc_rate_matching.cpp
@@ -3,28 +3,56 @@
     Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
+#include "bit_utils.hpp"
 #include "utils/allocators.hpp"
-
 #include <cassert>
 #include <cmath>
 #include <cstring>
 
 namespace armral::ldpc {
 
-static void bit_selection(uint32_t n, uint32_t e, uint32_t k0,
-                          const uint8_t *in, uint8_t *out) {
+static void bit_selection(uint32_t z, uint32_t n, uint32_t e,
+                          uint32_t len_filler_bits, uint32_t k, uint32_t k0,
+                          const uint8_t *in, uint8_t *out,
+                          uint8_t *scratch_buf1, uint8_t *scratch_buf2) {
+  const uint8_t *in_bits = in;
   // bit selection as specified by section 5.4.2.1 in 3GPP TS 38.212
-  // The filler bits (null values) are not relevant.
-  // k0 depends on the redundancy version id.
+  // remove Filler bits
+  if (len_filler_bits > 0) {
+
+    uint32_t len_s_f_bits = k - z * 2; // length of systematic & filler bits
+    uint32_t len_s_bits =
+        len_s_f_bits - len_filler_bits;     // length of systematic bits
+    uint32_t len_p_bits = n - len_s_f_bits; // length of parity bits
 
+    if (len_filler_bits % 8 == 0) {
+      uint32_t len_s_f_bytes = len_s_f_bits >> 3;
+      uint32_t len_s_bytes = len_s_bits >> 3;
+      uint32_t len_p_bytes = len_p_bits >> 3;
+      memcpy((void *)scratch_buf1, in, len_s_bytes);
+      memcpy((void *)&scratch_buf1[len_s_bytes], &in[len_s_f_bytes],
+             len_p_bytes);
+    } else {
+      bits_to_bytes(n, (const uint8_t *)in, (uint8_t *)scratch_buf1);
+      memcpy(scratch_buf2, scratch_buf1, len_s_bits);
+      memcpy(&scratch_buf2[len_s_bits], &scratch_buf1[len_s_f_bits],
+             len_p_bits);
+      bytes_to_bits((n - len_filler_bits), (const uint8_t *)scratch_buf2,
+                    (uint8_t *)scratch_buf1);
+    }
+
+    in_bits = scratch_buf1;
+  }
+
+  // k0 depends on the redundancy version id.
   // Zero out last byte in case e is not an integer multiple of 8.
   uint32_t num_bytes = (e + 7) / 8;
   out[num_bytes - 1] = 0;
 
   uint32_t src_bit = k0;
-  for (uint32_t k = 0; k < e; ++k) {
-    uint32_t bit = (in[src_bit / 8] >> (7 - src_bit % 8)) & 1;
-    out[k / 8] |= (bit << (7 - (k % 8)));
+  for (uint32_t i = 0; i < e; ++i) {
+    uint32_t bit = (in_bits[src_bit / 8] >> (7 - src_bit % 8)) & 1;
+    out[i / 8] |= (bit << (7 - (i % 8)));
     src_bit++;
     src_bit = src_bit % n;
   }
@@ -57,35 +85,41 @@ static int starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
     return 0;
   }
   if (rv == 1) {
-    return ((17 * z - (int)bg * 4 * z) * (ncb / n));
+    return (17 * z - (int)bg * 4 * z) * (ncb / n);
   }
   if (rv == 2) {
-    return ((33 * z - (int)bg * 8 * z) * (ncb / n));
+    return (33 * z - (int)bg * 8 * z) * (ncb / n);
   }
   if (rv == 3) {
-    return ((56 * z - (int)bg * 13 * z) * (ncb / n));
+    return (56 * z - (int)bg * 13 * z) * (ncb / n);
   }
   return 0;
 }
 
 template<typename Allocator>
 armral_status rate_matching(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
-                            uint32_t nref, uint32_t rv,
-                            armral_modulation_type mod, const uint8_t *src,
-                            uint8_t *dst, Allocator &allocator) {
+                            uint32_t nref, uint32_t len_filler_bits, uint32_t k,
+                            uint32_t rv, armral_modulation_type mod,
+                            const uint8_t *src, uint8_t *dst,
+                            Allocator &allocator) {
   auto selected = allocate_zeroed<uint8_t>(allocator, e);
+  uint32_t n = (bg == LDPC_BASE_GRAPH_2) ? 50 * z : 66 * z;
+
   // Map the modulation type onto the modulation order.
   uint32_t qm = 2 + (uint32_t)mod * 2;
-  uint32_t n = z;
   uint32_t ncb = 0;
-  bg == LDPC_BASE_GRAPH_2 ? n *= 50 : n *= 66;
   if (nref != 0) {
     ncb = (n > nref) ? nref : n;
   } else {
     ncb = n;
   }
+
+  auto scratch_buf1 = allocate_zeroed<uint8_t>(allocator, n);
+  auto scratch_buf2 = allocate_zeroed<uint8_t>(allocator, n);
+
   uint32_t k0 = starting_position(bg, rv, n, ncb, z);
-  bit_selection(ncb, e, k0, src, selected.get());
+  bit_selection(z, n, e, len_filler_bits, k, k0, src, selected.get(),
+                scratch_buf1.get(), scratch_buf2.get());
   bit_interleave(e, qm, selected.get(), dst);
   return ARMRAL_SUCCESS;
 }
@@ -93,21 +127,21 @@ armral_status rate_matching(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
 } // namespace armral::ldpc
 
 armral_status armral_ldpc_rate_matching(armral_ldpc_graph_t bg, uint32_t z,
-                                        uint32_t e, uint32_t nref, uint32_t rv,
-                                        armral_modulation_type mod,
+                                        uint32_t e, uint32_t nref,
+                                        uint32_t len_filler_bits, uint32_t k,
+                                        uint32_t rv, armral_modulation_type mod,
                                         const uint8_t *src, uint8_t *dst) {
   heap_allocator allocator{};
-  return armral::ldpc::rate_matching(bg, z, e, nref, rv, mod, src, dst,
-                                     allocator);
+  return armral::ldpc::rate_matching(bg, z, e, nref, len_filler_bits, k, rv,
+                                     mod, src, dst, allocator);
 }
 
-armral_status armral_ldpc_rate_matching_noalloc(armral_ldpc_graph_t bg,
-                                                uint32_t z, uint32_t e,
-                                                uint32_t nref, uint32_t rv,
-                                                armral_modulation_type mod,
-                                                const uint8_t *src,
-                                                uint8_t *dst, void *buffer) {
+armral_status armral_ldpc_rate_matching_noalloc(
+    armral_ldpc_graph_t bg, uint32_t z, uint32_t e, uint32_t nref,
+    uint32_t len_filler_bits, uint32_t k, uint32_t rv,
+    armral_modulation_type mod, const uint8_t *src, uint8_t *dst,
+    void *buffer) {
   buffer_bump_allocator allocator{buffer};
-  return armral::ldpc::rate_matching(bg, z, e, nref, rv, mod, src, dst,
-                                     allocator);
+  return armral::ldpc::rate_matching(bg, z, e, nref, len_filler_bits, k, rv,
+                                     mod, src, dst, allocator);
 }
diff --git a/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp b/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp
index 4c3ac5fc0973f1caf9d430f5e3019ccbbdcae3fb..803e814d9540faefdb08eba1ce3198476adb79c5 100644
--- a/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp
+++ b/src/UpperPHY/LDPC/ldpc_rate_recovery.cpp
@@ -11,24 +11,46 @@
 
 namespace armral::ldpc {
 
-void undo_selection(uint32_t n, uint32_t e, uint32_t k0, const int8_t *in,
-                    int8_t *out) {
+void undo_selection(uint32_t z, uint32_t n, uint32_t e,
+                    uint32_t len_filler_bits, uint32_t k, uint32_t k0,
+                    const int8_t *in, int8_t *out, int8_t *scratch_llrs) {
   // performs the inverse of the bit selection as specified by
   // section 5.4.2.1 in 3GPP TS 38.212
 
   assert(k0 >= 0 && k0 < n);
   assert(e > 0);
 
-  for (uint32_t k = 0; k < e; k++) {
-    int32_t sum = (int32_t)out[(k0 + k) % n] + (int32_t)in[k];
+  int8_t *out_llrs;
+
+  if (len_filler_bits > 0) {
+    out_llrs = scratch_llrs;
+  } else {
+    out_llrs = out;
+  }
+
+  for (uint32_t i = 0; i < e; i++) {
+    int32_t sum = (int32_t)out_llrs[(k0 + i) % n] + (int32_t)in[i];
     if (sum < (int32_t)INT8_MIN) {
-      out[(k0 + k) % n] = INT8_MIN;
+      out_llrs[(k0 + i) % n] = INT8_MIN;
     } else if (sum > (int32_t)INT8_MAX) {
-      out[(k0 + k) % n] = INT8_MAX;
+      out_llrs[(k0 + i) % n] = INT8_MAX;
     } else {
-      out[(k0 + k) % n] = (int8_t)sum;
+      out_llrs[(k0 + i) % n] = (int8_t)sum;
     }
   }
+
+  //insert Filler bits
+  if (len_filler_bits > 0) {
+    uint32_t len_s_f_bits = k - z * 2; // length of systematic & filler bits
+    uint32_t len_s_bits =
+        len_s_f_bits - len_filler_bits;     // length of systematic bits
+    uint32_t len_p_bits = n - len_s_f_bits; // length of parity bits
+
+    memcpy(out, scratch_llrs, len_s_bits);
+    memset(&out[len_s_bits], 0, len_filler_bits);
+    memcpy(&out[len_s_bits + len_filler_bits], &scratch_llrs[len_s_bits],
+           len_p_bits);
+  }
 }
 
 void undo_interleave(uint32_t e, uint32_t qm, const int8_t *in, int8_t *out) {
@@ -57,24 +79,26 @@ static int starting_position(armral_ldpc_graph_t bg, uint32_t rv, uint32_t n,
     return 0;
   }
   if (rv == 1) {
-    return ((17 * z - (int)bg * 4 * z) * (ncb / n));
+    return (17 * z - (int)bg * 4 * z) * (ncb / n);
   }
   if (rv == 2) {
-    return ((33 * z - (int)bg * 8 * z) * (ncb / n));
+    return (33 * z - (int)bg * 8 * z) * (ncb / n);
   }
   if (rv == 3) {
-    return ((56 * z - (int)bg * 13 * z) * (ncb / n));
+    return (56 * z - (int)bg * 13 * z) * (ncb / n);
   }
   return 0;
 }
 
 template<typename Allocator>
 armral_status rate_recovery(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
-                            uint32_t nref, uint32_t rv,
-                            armral_modulation_type mod, const int8_t *src,
-                            int8_t *dst, Allocator &allocator) {
-  auto llrs = allocate_uninitialized<int8_t>(allocator, e);
+                            uint32_t nref, uint32_t len_filler_bits, uint32_t k,
+                            uint32_t rv, armral_modulation_type mod,
+                            const int8_t *src, int8_t *dst,
+                            Allocator &allocator) {
+  auto llrs = allocate_zeroed<int8_t>(allocator, e);
   uint32_t n = (bg == LDPC_BASE_GRAPH_2) ? 50 * z : 66 * z;
+  auto scratch_llrs = allocate_zeroed<int8_t>(allocator, n);
   uint32_t ncb = 0;
   if (nref != 0) {
     ncb = (n > nref) ? nref : n;
@@ -84,25 +108,28 @@ armral_status rate_recovery(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
   uint32_t k0 = starting_position(bg, rv, n, ncb, z);
   uint32_t qm = 2 + (uint32_t)mod * 2;
   undo_interleave(e, qm, src, llrs.get());
-  undo_selection(ncb, e, k0, llrs.get(), dst);
+  undo_selection(z, n, e, len_filler_bits, k, k0, llrs.get(), dst,
+                 scratch_llrs.get());
   return ARMRAL_SUCCESS;
 }
 
 } // namespace armral::ldpc
 
 armral_status armral_ldpc_rate_recovery(armral_ldpc_graph_t bg, uint32_t z,
-                                        uint32_t e, uint32_t nref, uint32_t rv,
-                                        armral_modulation_type mod,
+                                        uint32_t e, uint32_t nref,
+                                        uint32_t len_filler_bits, uint32_t k,
+                                        uint32_t rv, armral_modulation_type mod,
                                         const int8_t *src, int8_t *dst) {
   heap_allocator allocator{};
-  return armral::ldpc::rate_recovery(bg, z, e, nref, rv, mod, src, dst,
-                                     allocator);
+  return armral::ldpc::rate_recovery(bg, z, e, nref, len_filler_bits, k, rv,
+                                     mod, src, dst, allocator);
 }
 
 armral_status armral_ldpc_rate_recovery_noalloc(
-    armral_ldpc_graph_t bg, uint32_t z, uint32_t e, uint32_t nref, uint32_t rv,
+    armral_ldpc_graph_t bg, uint32_t z, uint32_t e, uint32_t nref,
+    uint32_t len_filler_bits, uint32_t k, uint32_t rv,
     armral_modulation_type mod, const int8_t *src, int8_t *dst, void *buffer) {
   buffer_bump_allocator allocator{buffer};
-  return armral::ldpc::rate_recovery(bg, z, e, nref, rv, mod, src, dst,
-                                     allocator);
+  return armral::ldpc::rate_recovery(bg, z, e, nref, len_filler_bits, k, rv,
+                                     mod, src, dst, allocator);
 }
diff --git a/test/LDPC/decoding/main.cpp b/test/LDPC/decoding/main.cpp
index aa297f4509bd4a7ca3898a68a449a9c0bc618f99..809d1148948d516e9d4d11afe2f12843aad0ae1e 100644
--- a/test/LDPC/decoding/main.cpp
+++ b/test/LDPC/decoding/main.cpp
@@ -115,9 +115,10 @@ bool run_ldpc_decoding_test(uint32_t its, uint32_t z, armral_ldpc_graph_t bg,
 
   uint32_t encoded_len = z * graph->ncodeword_bits;
   auto encoded = allocate_random_u8((encoded_len + 7) / 8);
-
+  uint32_t len_filler_bits = 0;
   // Encode the data
-  armral_ldpc_encode_block(to_encode.data(), bg, z, encoded.data());
+  armral_ldpc_encode_block(to_encode.data(), bg, z, len_filler_bits,
+                           encoded.data());
 
   // run modulation
   armral_modulation_type mod_type = ARMRAL_MOD_16QAM;
diff --git a/test/LDPC/encoding/ldpc_encoding_test_data.h b/test/LDPC/encoding/ldpc_encoding_test_data.h
index e6d294e3b1022898ebbabd67da584dfc4cb36719..792bcb807601ab988dfb468d28d51691357d3186 100644
--- a/test/LDPC/encoding/ldpc_encoding_test_data.h
+++ b/test/LDPC/encoding/ldpc_encoding_test_data.h
@@ -14,6 +14,7 @@ struct ldpc_test_param_t {
   uint32_t lifting_size;
   uint32_t length;
   armral_ldpc_graph_t graph_type;
+  uint32_t length_of_filler_bits;
 };
 
 std::vector<ldpc_test_param_t> ldpc_tests = {{
@@ -21,100 +22,145 @@ std::vector<ldpc_test_param_t> ldpc_tests = {{
                                                  2,
                                                  2 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
                                                  // index set 1, base graph 1
                                                  3,
                                                  3 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
                                                  // index set 2, base graph 1
                                                  5,
                                                  5 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
                                                  // index set 3, base graph 1
                                                  7,
                                                  7 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
                                                  // index set 4, base graph 1
                                                  9,
                                                  9 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
                                                  // index set 5, base graph 1
                                                  11,
                                                  11 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
                                                  // index set 6, base graph 1
                                                  13,
                                                  13 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
-                                                 // index set 6, base graph 1
+                                                 // index set 7, base graph 1
                                                  208,
                                                  208 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
                                              },
                                              {
-                                                 // index set 7, base graph 1
+                                                 // index set 8, base graph 1
                                                  15,
                                                  15 * 22,
                                                  LDPC_BASE_GRAPH_1,
+                                                 0,
+                                             },
+                                             {
+                                                 // index set 9, base graph 1
+                                                 18,
+                                                 18 * 22,
+                                                 LDPC_BASE_GRAPH_1,
+                                                 76,
+                                             },
+                                             {
+                                                 // index set 10, base graph 1
+                                                 112,
+                                                 112 * 22,
+                                                 LDPC_BASE_GRAPH_1,
+                                                 72,
                                              },
                                              {
                                                  // index set 0, base graph 2
                                                  2,
                                                  2 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
                                              },
                                              {
                                                  // index set 1, base graph 2
                                                  3,
                                                  3 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
                                              },
                                              {
                                                  // index set 2, base graph 2
                                                  5,
                                                  5 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
                                              },
                                              {
                                                  // index set 3, base graph 2
                                                  7,
                                                  7 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
                                              },
                                              {
                                                  // index set 4, base graph 2
                                                  9,
                                                  9 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
                                              },
                                              {
                                                  // index set 5, base graph 2
                                                  11,
                                                  11 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
                                              },
                                              {
                                                  // index set 6, base graph 2
                                                  13,
                                                  13 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
                                              },
                                              {
                                                  // index set 7, base graph 2
                                                  15,
                                                  15 * 10,
                                                  LDPC_BASE_GRAPH_2,
+                                                 0,
+                                             },
+                                             {
+                                                 // index set 8, base graph 2
+                                                 18,
+                                                 18 * 10,
+                                                 LDPC_BASE_GRAPH_2,
+                                                 36,
+                                             },
+                                             {
+                                                 // index set 9, base graph 2
+                                                 112,
+                                                 112 * 10,
+                                                 LDPC_BASE_GRAPH_2,
+                                                 76,
                                              }};
diff --git a/test/LDPC/encoding/main.cpp b/test/LDPC/encoding/main.cpp
index 7415eb57d2f0fb5c16178b3abbde6e1df3dcb9a8..2c2ba9b4a65b4c0ddc69a2f03ec1a0931c61df94 100644
--- a/test/LDPC/encoding/main.cpp
+++ b/test/LDPC/encoding/main.cpp
@@ -8,8 +8,8 @@
 #include "int8_utils.hpp"
 #include "ldpc_coding.hpp"
 #include "ldpc_encoding_test_data.h"
-
 #include <cstdlib>
+#include <cstring>
 
 namespace {
 
@@ -179,7 +179,8 @@ inline void set_remaining_bits(armral_ldpc_graph_t bg, uint32_t z, uint32_t lsi,
 
 std::vector<uint8_t> armral_ldpc_encode_block_ref(const uint8_t *data_in,
                                                   armral_ldpc_graph_t bg,
-                                                  uint32_t z) {
+                                                  uint32_t z,
+                                                  uint32_t len_filler_bits) {
 
   // Get a pointer to the graph to be working with
   const auto *graph = armral_ldpc_get_base_graph(bg);
@@ -284,10 +285,12 @@ bool check_bytes_equal(const std::vector<uint8_t> &enc,
     return false;
     // GCOVR_EXCL_STOP
   }
+
   for (uint32_t i = 0; i < enc.size(); ++i) {
     if (enc[i] != expected[i]) {
       // GCOVR_EXCL_START
       std::cout << "LDPC: z=" << tc.lifting_size << ", length=" << tc.length
+                << ", filler bits length=" << tc.length_of_filler_bits
                 << ", base_graph=" << (int)tc.graph_type + 1
                 << ". Bit at position " << i << " is " << (int)enc[i]
                 << ", but expected " << (int)expected[i] << std::endl;
@@ -308,28 +311,43 @@ bool test_ldpc_encode_block(
   for (auto &tc : ldpc_tests) {
     // Generate some random data to encode. This should be in a single
     // block
-    auto data_in = gen_random_bits(tc.length);
+
+    uint32_t len = tc.length;
+
+    if ((tc.length_of_filler_bits > 0) &&
+        ((tc.length_of_filler_bits % 8) != 0)) {
+      len += 8;
+    }
+
+    auto data_in = gen_random_bits(len);
+
+    // Zero memset input vector's end portion of length length_of_filler_bits
+    memset(&(data_in[(tc.length - tc.length_of_filler_bits + 7) / 8]), 0,
+           (tc.length_of_filler_bits + 7) / 8);
 
     const auto *bg = armral_ldpc_get_base_graph(tc.graph_type);
 
     std::vector<uint8_t> encoding((tc.lifting_size * bg->ncodeword_bits + 7) /
                                   8);
+
     ldpc_encode_block_under_test(data_in.data(), tc.graph_type, tc.lifting_size,
-                                 encoding.data());
+                                 tc.length_of_filler_bits, encoding.data());
 
     auto encoding_bytes =
         bits_to_bytes(tc.lifting_size * bg->ncodeword_bits, encoding.data());
 
     // Compute full block data by reference implementation
     // and check its validity
-    auto encoding_ref = armral_ldpc_encode_block_ref(
-        data_in.data(), tc.graph_type, tc.lifting_size);
+    auto encoding_ref =
+        armral_ldpc_encode_block_ref(data_in.data(), tc.graph_type,
+                                     tc.lifting_size, tc.length_of_filler_bits);
 
     // Check the validity of the reference implementation: H*c = 0
     // Can be removed once the code is stable
     passed &= perform_parity_check(encoding_ref.data(), tc.lifting_size,
                                    tc.graph_type);
     assert(passed && "LDPC encoding: Invalid reference implementation");
+
     // Puncture solution from reference and compared with computed
     const std::vector<uint8_t> punctured = {
         encoding_ref.begin() + 2 * tc.lifting_size, encoding_ref.end()};
@@ -351,11 +369,12 @@ int main(int argc, char **argv) {
   passed &= test_ldpc_encode_block(
       "LDPCEncodeBlockNoAlloc",
       [](uint8_t const *data_in, armral_ldpc_graph_t bg, uint32_t z,
-         uint8_t *data_out) {
-        auto buffer_size = armral_ldpc_encode_block_noalloc_buffer_size(bg, z);
+         uint32_t len_filler_bits, uint8_t *data_out) {
+        auto buffer_size = armral_ldpc_encode_block_noalloc_buffer_size(
+            bg, z, len_filler_bits);
         std::vector<uint8_t> buffer(buffer_size);
-        return armral_ldpc_encode_block_noalloc(data_in, bg, z, data_out,
-                                                buffer.data());
+        return armral_ldpc_encode_block_noalloc(data_in, bg, z, len_filler_bits,
+                                                data_out, buffer.data());
       });
 
   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
diff --git a/test/LDPC/rate_matching/main.cpp b/test/LDPC/rate_matching/main.cpp
index 3f695a1f8e5161380d90094b037da640719c7c5f..52bc1280dc4f816972555bcec9fbb0dadc7892aa 100644
--- a/test/LDPC/rate_matching/main.cpp
+++ b/test/LDPC/rate_matching/main.cpp
@@ -3,8 +3,8 @@
     Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
 */
 #include "armral.h"
+#include "bit_utils.hpp"
 #include "int8_utils.hpp"
-
 #include <algorithm>
 #include <cassert>
 #include <cmath>
@@ -12,25 +12,61 @@
 #include <vector>
 
 namespace {
-void ref_bit_selection(uint32_t n, uint32_t e, uint32_t k0, const uint8_t *in,
-                       uint8_t *out) {
+void ref_bit_selection(uint32_t z, uint32_t n, uint32_t e,
+                       uint32_t len_filler_bits, uint32_t k, uint32_t k0,
+                       const uint8_t *in, uint8_t *out) {
+
+  const uint8_t *in_bits = in;
+  std::vector<uint8_t> scratch_buf1(n);
+  std::vector<uint8_t> scratch_buf2(n);
+  auto *scratch_ptr1 = scratch_buf1.data();
+  auto *scratch_ptr2 = scratch_buf2.data();
+
   // bit selection as specified by section 5.4.2.1 in 3GPP TS 38.212
-  // The filler bits (null values) are not relevant.
-  // k0 depends on the redundancy version id.
+  // remove Filler bits
+  if (len_filler_bits > 0) {
+
+    uint32_t len_s_f_bits = k - z * 2; // length of systematic & filler bits
+    uint32_t len_s_bits =
+        len_s_f_bits - len_filler_bits;     // length of systematic bits
+    uint32_t len_p_bits = n - len_s_f_bits; // length of parity bits
+
+    if (len_filler_bits % 8 == 0) {
+      uint32_t len_s_f_bytes = len_s_f_bits >> 3;
+      uint32_t len_s_bytes = len_s_bits >> 3;
+      uint32_t len_p_bytes = len_p_bits >> 3;
+
+      memcpy(scratch_ptr1, in, len_s_bytes); // skip Filler bits
+      memcpy(&scratch_ptr1[len_s_bytes], &in[len_s_f_bytes],
+             len_p_bytes); // skip Filler bits
+
+    } else {
+
+      bits_to_bytes(n, (const uint8_t *)in, (uint8_t *)scratch_ptr1);
+      memcpy(scratch_ptr2, scratch_ptr1, len_s_bits);
+      memcpy(&scratch_ptr2[len_s_bits], &scratch_ptr1[len_s_f_bits],
+             len_p_bits);
+      bytes_to_bits((n - len_filler_bits), (const uint8_t *)scratch_ptr2,
+                    scratch_ptr1);
+    }
+
+    in_bits = scratch_ptr1;
+  }
 
+  // k0 depends on the redundancy version id.
   assert(n > 0);
   assert(e > 0);
   assert(k0 >= 0 && k0 < n);
   assert(n % 2 == 0);
 
-  for (uint32_t k = 0; k < (e + 7) / 8; k++) {
-    out[k] = 0;
+  for (uint32_t i = 0; i < (e + 7) / 8; i++) {
+    out[i] = 0;
   }
 
   int src_bit = k0;
-  for (uint32_t k = 0; k < e; ++k) {
-    uint32_t bit = (in[src_bit / 8] >> (7 - src_bit % 8)) & 1;
-    out[k / 8] |= (bit << (7 - (k % 8)));
+  for (uint32_t i = 0; i < e; ++i) {
+    uint32_t bit = (in_bits[src_bit / 8] >> (7 - src_bit % 8)) & 1;
+    out[i / 8] |= (bit << (7 - (i % 8)));
     src_bit++;
     src_bit = src_bit % n;
   }
@@ -69,73 +105,88 @@ bool test_ref_rate_matching() {
   // Test bit selection for k0 = 0.
   // Copy the first 30 bits. Expect the final 2 bits
   // to be pruned.
-  ref_bit_selection(30, 30, 0, in, out);
+  ref_bit_selection(0, 30, 30, 0, 0, 0, in, out);
   for (int i = 0; i < 3; i++) {
     passed &= (in[i] == out[i]);
   }
   passed &= (out[3] == 0xA8);
 
   // Expect the first 23 bits of the input sequence.
-  ref_bit_selection(30, 23, 0, in, out);
+  ref_bit_selection(0, 30, 23, 0, 0, 0, in, out);
   passed &= (out[0] == in[0]);
   passed &= (out[1] == in[1]);
   passed &= (out[2] == 0xFE);
 
   // Expect the first 26 bits of the input sequence.
-  ref_bit_selection(30, 26, 0, in, out);
+  ref_bit_selection(0, 30, 26, 0, 0, 0, in, out);
   passed &= (out[0] == in[0]);
   passed &= (out[1] == in[1]);
   passed &= (out[2] == in[2]);
   passed &= (out[3] == 0x80);
 
   // Expect repetition of the first 8 bits.
-  ref_bit_selection(16, 24, 0, in, out);
+  ref_bit_selection(0, 16, 24, 0, 0, 0, in, out);
   passed &= (out[0] == in[0]);
   passed &= (out[1] == in[1]);
   passed &= (out[2] == in[0]);
 
   // Expect input repeated twice.
-  ref_bit_selection(16, 32, 0, in, out);
+  ref_bit_selection(0, 16, 32, 0, 0, 0, in, out);
   passed &= (out[0] == in[0]) && (out[2] == in[0]);
   passed &= (out[1] == in[1]) && (out[3] == in[1]);
 
   // Expect repetition of the first 6 bits.
-  ref_bit_selection(16, 22, 0, in, out);
+  ref_bit_selection(0, 16, 22, 0, 0, 0, in, out);
   passed &= (out[0] == in[0]);
   passed &= (out[1] == in[1]);
   passed &= (out[2] == 0xA4);
 
   // A7 = 1010 01  11
   // Expect repetition of 1010 01
-  ref_bit_selection(6, 16, 0, in, out);
+  ref_bit_selection(0, 6, 16, 0, 0, 0, in, out);
   passed &= (out[0] == 0xA6);
   passed &= (out[1] == 0x9A);
 
   // A7F = 1010 0111 11
   // Expect repetition of 1010 0111 1110 1001
-  ref_bit_selection(10, 16, 0, in, out);
+  ref_bit_selection(0, 10, 16, 0, 0, 0, in, out);
   passed &= (out[0] == in[0]);
   passed &= (out[1] == 0xE9);
 
   // rv_id = 1, bg = 1, N_cb = 32, Z_C = 2 gives k0 = 16
-  ref_bit_selection(30, 30, 16, in, out);
+  ref_bit_selection(0, 30, 30, 0, 0, 16, in, out);
   passed &= (out[0] == in[2]);
   passed &= (out[1] == 0xAA);
   passed &= (out[2] == 0x9F);
   passed &= (out[3] == 0xFC);
 
-  ref_bit_selection(30, 23, 16, in, out);
+  ref_bit_selection(0, 30, 23, 0, 0, 16, in, out);
   passed &= (out[0] == in[2]);
   passed &= (out[1] == 0xAA);
   passed &= (out[2] == 0x9E);
 
   // Expect input repeated twice.
-  ref_bit_selection(24, 32, 16, in, out);
+  ref_bit_selection(0, 24, 32, 0, 0, 16, in, out);
   passed &= (out[0] == in[2]);
   passed &= (out[1] == in[0]);
   passed &= (out[2] == in[1]);
   passed &= (out[3] == in[2]);
 
+  // rv_id = 0  z = 7 , n = 350, e  = 328, len_filler_bits = 16 k = 70
+  uint8_t in_filler[] = {0x22, 0x35, 0x72, 0xd4, 0xb5, 0x00, 0x00, 0x9a, 0x32,
+                         0xd0, 0x45, 0x6d, 0x18, 0x10, 0xfa, 0xf8, 0xa4, 0x5e,
+                         0x8c, 0x88, 0x1f, 0x8a, 0xf6, 0x66, 0xad, 0xc8, 0xb0,
+                         0xc8, 0xe6, 0xca, 0x5c, 0x4e, 0x0a, 0x59, 0x47, 0x33,
+                         0xb8, 0x61, 0x0c, 0x6c, 0x8c, 0xa8, 0xa8, 0xb0};
+  uint8_t out_filler[328 >> 3] = {0};
+  ref_bit_selection(7, 350, 328, 16, 70, 0, in_filler, out_filler);
+
+  for (uint16_t i = 0, j = 0; i < (328 >> 3); i++) {
+    if ((i != 5) && (i != 6)) {
+      passed &= (in_filler[i] == out_filler[j++]);
+    }
+  }
+
   // Test bit interleaving.
   // in:  1010 0111 1111
   // out = 1011 0011 1111
@@ -244,7 +295,8 @@ uint32_t num_bit_per_symbol(armral_modulation_type mod_type) {
 }
 
 void armral_ref_rate_matching(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
-                              uint32_t nref, uint32_t rv,
+                              uint32_t nref, uint32_t len_filler_bits,
+                              uint32_t k, uint32_t rv,
                               armral_modulation_type mod, const uint8_t *src,
                               uint8_t *dst) {
   uint32_t ncb;
@@ -258,7 +310,7 @@ void armral_ref_rate_matching(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
   }
   uint32_t k0 = starting_position(bg, rv, n, ncb, z);
   std::vector<uint8_t> selected(e);
-  ref_bit_selection(ncb, e, k0, src, selected.data());
+  ref_bit_selection(z, n, e, len_filler_bits, k, k0, src, selected.data());
   ref_bit_interleave(e, qm, selected.data(), dst);
 }
 
@@ -272,6 +324,7 @@ bool test_ldpc_rate_matching(
   // Arbitrary subset of lifting sizes given in Table 5.3.2-1.
   const uint32_t lifting_size_list[] = {2, 3, 5, 11, 28, 32, 104};
   const uint32_t rv_list[] = {0, 1, 2, 3};
+  const uint32_t filler_bits_list[] = {0, 28, 32};
   uint32_t lifting_size_list_len =
       sizeof(lifting_size_list) / sizeof(lifting_size_list[0]);
   uint32_t lifting_size_min = *std::min_element(
@@ -280,43 +333,52 @@ bool test_ldpc_rate_matching(
       lifting_size_list, lifting_size_list + lifting_size_list_len);
   // Prepare nref_list for both BG1 and BG2 using lifting_size_list
   const uint32_t nref_list[2][3] = {
-      {0, (66 * lifting_size_min), (66 * lifting_size_max)}, //BG1
-      {0, (50 * lifting_size_min), (50 * lifting_size_max)}, //BG2
+      {0, (66 * lifting_size_min), (66 * lifting_size_max)}, // BG1
+      {0, (50 * lifting_size_min), (50 * lifting_size_max)}, // BG2
   };
 
   // Arbitrary subset of modulation schemes.
   const armral_modulation_type mod_list[] = {ARMRAL_MOD_QPSK, ARMRAL_MOD_16QAM};
   for (auto z : lifting_size_list) {
     for (auto bg : base_graph_list) {
-      // Default to base graph 1
-      uint32_t num_bits = src_length(bg, z);
-      uint32_t num_bytes = (num_bits + 7) / 8;
-      std::vector<uint8_t> src = allocate_random_u8(num_bytes, 0U, 1U);
-      for (auto mod : mod_list) {
-        uint32_t qm = num_bit_per_symbol(mod);
-        // Choose G as number of coded bits. The specs define
-        // 'G is the total number of coded bits available for transmission
-        // of the transpose block'.
-        // G should be revised once a better understanding of what
-        // values G attains has been obtained.
-        uint32_t g = 22 * z;
-        if (bg == LDPC_BASE_GRAPH_2) {
-          g = 11 * z;
+      for (auto len_filler_bits : filler_bits_list) {
+        if (z < 28) {
+          len_filler_bits = 0;
         }
-        // Choose e based on E_r = N_L * qm * ceil(G / (N_L * qm * C))
-        // where N_L = 1, and C = 1.
-        uint32_t e = qm * ((g + qm - 1) / qm);
-        num_bytes = (e + 7) / 8;
-        for (auto rv : rv_list) {
-          for (auto nref : nref_list[bg]) {
-            std::vector<uint8_t> ref = std::vector<uint8_t>(num_bytes);
-            std::vector<uint8_t> dst = std::vector<uint8_t>(num_bytes);
-            ldpc_rate_matching_under_test(bg, z, e, nref, rv, mod, src.data(),
-                                          dst.data());
-            armral_ref_rate_matching(bg, z, e, nref, rv, mod, src.data(),
-                                     ref.data());
-            passed &=
-                check_results_u8(test_name, dst.data(), ref.data(), num_bytes);
+        // Default to base graph 1
+        uint32_t num_bits = src_length(bg, z);
+        uint32_t num_bytes = (num_bits + 7) / 8;
+        std::vector<uint8_t> src_store = allocate_random_u8(num_bytes, 0U, 1U);
+        std::vector<uint8_t> src = std::vector<uint8_t>(num_bytes);
+        for (auto mod : mod_list) {
+          uint32_t qm = num_bit_per_symbol(mod);
+          // Choose G as number of coded bits. The specs define
+          // 'G is the total number of coded bits available for transmission
+          // of the transpose block'.
+          // G should be revised once a better understanding of what
+          // values G attains has been obtained.
+          uint32_t g = 22 * z;
+          if (bg == LDPC_BASE_GRAPH_2) {
+            g = 10 * z;
+          }
+          // Choose e based on E_r = N_L * qm * ceil(G / (N_L * qm * C))
+          // where N_L = 1, and C = 1.
+          uint32_t e = qm * ((g + qm - 1) / qm);
+          num_bytes = (e + 7) / 8;
+          for (auto rv : rv_list) {
+            for (auto nref : nref_list[bg]) {
+              std::vector<uint8_t> ref = std::vector<uint8_t>(num_bytes);
+              std::vector<uint8_t> dst = std::vector<uint8_t>(num_bytes);
+
+              memcpy(src.data(), src_store.data(), num_bytes);
+              ldpc_rate_matching_under_test(bg, z, e, nref, len_filler_bits, g,
+                                            rv, mod, src.data(), dst.data());
+              memcpy(src.data(), src_store.data(), num_bytes);
+              armral_ref_rate_matching(bg, z, e, nref, len_filler_bits, g, rv,
+                                       mod, src.data(), ref.data());
+              passed &= check_results_u8(test_name, dst.data(), ref.data(),
+                                         num_bytes);
+            }
           }
         }
       }
@@ -335,7 +397,7 @@ int main(int argc, char **argv) {
   passed &= test_ldpc_rate_matching(
       "ldpc_rate_matching_noalloc",
       [](armral_ldpc_graph_t bg, uint32_t z, uint32_t e, auto... args) {
-        std::vector<uint8_t> buffer(e);
+        std::vector<uint8_t> buffer(e + 2 * z * 66);
         return armral_ldpc_rate_matching_noalloc(bg, z, e, args...,
                                                  buffer.data());
       });
diff --git a/test/LDPC/rate_recovery/main.cpp b/test/LDPC/rate_recovery/main.cpp
index 5d7eb463d241e085d8465124bab62b6614d0aef5..88a67093cf473a7e44734c7c84de32a0f4f97b2f 100644
--- a/test/LDPC/rate_recovery/main.cpp
+++ b/test/LDPC/rate_recovery/main.cpp
@@ -13,8 +13,9 @@
 
 namespace {
 
-void ref_undo_selection(uint32_t n, uint32_t e, uint32_t k0, const int8_t *in,
-                        int8_t *out) {
+void ref_undo_selection(uint32_t z, uint32_t n, uint32_t e,
+                        uint32_t len_filler_bits, uint32_t k, uint32_t k0,
+                        const int8_t *in, int8_t *out, int8_t *scratch_llrs) {
   // performs the inverse of the bit selection as specified by
   // section 5.4.2.1 in 3GPP TS 38.212
   assert(k0 >= 0 && k0 < n);
@@ -25,16 +26,38 @@ void ref_undo_selection(uint32_t n, uint32_t e, uint32_t k0, const int8_t *in,
     assert(out[i] == 0);
   }
 
-  for (uint32_t k = 0; k < e; k++) {
-    int32_t sum = (int32_t)out[(k0 + k) % n] + (int32_t)in[k];
+  int8_t *out_llrs;
+
+  if (len_filler_bits > 0) {
+    out_llrs = scratch_llrs;
+  } else {
+    out_llrs = out;
+  }
+
+  for (uint32_t i = 0; i < e; i++) {
+    int32_t sum = (int32_t)out_llrs[(k0 + i) % n] + (int32_t)in[i];
     if (sum < (int32_t)INT8_MIN) {
-      out[(k0 + k) % n] = INT8_MIN;
+      out_llrs[(k0 + i) % n] = INT8_MIN;
     } else if (sum > (int32_t)INT8_MAX) {
-      out[(k0 + k) % n] = INT8_MAX;
+      out_llrs[(k0 + i) % n] = INT8_MAX;
     } else {
-      out[(k0 + k) % n] = (int8_t)sum;
+      out_llrs[(k0 + i) % n] = (int8_t)sum;
     }
   }
+
+  // insert Filler bits
+  if (len_filler_bits > 0) {
+
+    uint32_t len_s_f_bits = k - z * 2; // length of systematic & filler bits
+    uint32_t len_s_bits =
+        len_s_f_bits - len_filler_bits;     // length of systematic bits
+    uint32_t len_p_bits = n - len_s_f_bits; // length of parity bits
+
+    memcpy(out, scratch_llrs, len_s_bits);
+    memset(&out[len_s_bits], 0, len_filler_bits);
+    memcpy(&out[len_s_bits + len_filler_bits], &scratch_llrs[len_s_bits],
+           len_p_bits);
+  }
 }
 
 void ref_undo_interleave(uint32_t e, uint32_t qm, const int8_t *in,
@@ -112,7 +135,8 @@ uint32_t num_bit_per_symbol(armral_modulation_type mod_type) {
 }
 
 void armral_ref_rate_recovery(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
-                              uint32_t nref, uint32_t rv,
+                              uint32_t nref, uint32_t len_filler_bits,
+                              uint32_t k, uint32_t rv,
                               armral_modulation_type mod, const int8_t *src,
                               int8_t *dst) {
   uint32_t ncb = 0;
@@ -127,8 +151,10 @@ void armral_ref_rate_recovery(armral_ldpc_graph_t bg, uint32_t z, uint32_t e,
   uint32_t k0 = starting_position(bg, rv, n, ncb, z);
   uint32_t qm = num_bit_per_symbol(mod);
   std::vector<int8_t> llrs(e);
+  std::vector<int8_t> scratch_llrs(n);
   ref_undo_interleave(e, qm, src, llrs.data());
-  ref_undo_selection(ncb, e, k0, llrs.data(), dst);
+  ref_undo_selection(z, n, e, len_filler_bits, k, k0, llrs.data(), dst,
+                     scratch_llrs.data());
 }
 
 bool test_ref_rate_recovery() {
@@ -140,7 +166,7 @@ bool test_ref_rate_recovery() {
   uint32_t k0 = 16;
   auto in = allocate_random_i8(e);
   std::vector<int8_t> out(n);
-  ref_undo_selection(n, e, k0, in.data(), out.data());
+  ref_undo_selection(0, n, e, 0, 0, k0, in.data(), out.data(), NULL);
   passed &= std::equal(out.begin() + k0, out.begin() + n, in.begin());
   passed &= std::equal(out.begin(), out.begin() + k0, in.begin() + k0);
 
@@ -149,7 +175,7 @@ bool test_ref_rate_recovery() {
   n = 32;
   k0 = 16;
   memset(out.data(), 0, n * sizeof(int8_t));
-  ref_undo_selection(n, e, k0, in.data(), out.data());
+  ref_undo_selection(0, n, e, 0, 0, k0, in.data(), out.data(), NULL);
   passed &= std::equal(out.begin() + k0, out.begin() + n, in.begin());
   passed &= std::equal(out.begin(), out.begin() + (e - k0), in.begin() + k0);
   passed &= std::all_of(out.begin() + (e - k0), out.begin() + k0,
@@ -172,7 +198,7 @@ bool test_ref_rate_recovery() {
     in[16 + i] = in[16 + i] / 2;
   }
   memset(out.data(), 0, n * sizeof(int8_t));
-  ref_undo_selection(n, e, k0, in.data(), out.data());
+  ref_undo_selection(0, n, e, 0, 0, k0, in.data(), out.data(), NULL);
   passed &= (out[0] == INT8_MIN);
   passed &= (out[1] == INT8_MAX);
   for (uint32_t i = 2; i < 16; i++) {
@@ -180,6 +206,31 @@ bool test_ref_rate_recovery() {
     passed &= (out[i] == (int8_t)sol);
   }
 
+  // Test selection process with filler bits
+  e = 328;
+  n = 328;
+  k0 = 0;
+  uint32_t z = 7;
+  uint32_t f = 16;
+  auto in_filler = allocate_random_i8(e);
+  std::vector<int8_t> out_filler(n);
+  std::vector<int8_t> scratch_llrs(n);
+
+  ref_undo_selection(z, n, e, f, z * 10, k0, in_filler.data(),
+                     out_filler.data(), scratch_llrs.data());
+
+  for (uint16_t i = 0, j = 0; i < n; i++) {
+    if (i < (z * 10 - 2 * z - f)) {
+      passed &= (in_filler[j] == out_filler[i]);
+      j++;
+    } else if ((i >= (z * 10 - 2 * z - f)) && (i < (z * 10 - 2 * z))) {
+      passed &= (0 == out_filler[i]);
+    } else {
+      passed &= (in_filler[j] == out_filler[i]);
+      j++;
+    }
+  }
+
   // Test reversal of interleaving
   e = 16;
   std::iota(in.begin(), in.begin() + 16, 1);
@@ -201,7 +252,7 @@ bool test_ldpc_rate_recovery(
   const armral_ldpc_graph_t base_graph_list[] = {LDPC_BASE_GRAPH_1,
                                                  LDPC_BASE_GRAPH_2};
   // Arbitrary subset of lifting sizes given in Table 5.3.2-1.
-  const uint32_t lifting_size_list[] = {2, 3, 8};
+  const uint32_t lifting_size_list[] = {2, 3, 8, 11, 28, 32, 104};
   uint32_t lifting_size_list_len =
       sizeof(lifting_size_list) / sizeof(lifting_size_list[0]);
   uint32_t lifting_size_min = *std::min_element(
@@ -213,7 +264,7 @@ bool test_ldpc_rate_recovery(
       {0, (66 * lifting_size_min), (66 * lifting_size_max)}, //BG1
       {0, (50 * lifting_size_min), (50 * lifting_size_max)}, //BG2
   };
-
+  const uint32_t filler_bits_list[] = {0, 28, 36};
   // Arbitrary subset of modulation schemes.
   const armral_modulation_type mod_list[] = {ARMRAL_MOD_QPSK,
                                              ARMRAL_MOD_256QAM};
@@ -223,20 +274,26 @@ bool test_ldpc_rate_recovery(
   for (auto z : lifting_size_list) {
     for (auto bg : base_graph_list) {
       uint32_t n = (bg == LDPC_BASE_GRAPH_2) ? 50 * z : 66 * z;
-      for (auto mod : mod_list) {
-        uint32_t qm = num_bit_per_symbol(mod);
-        uint32_t g = (bg == LDPC_BASE_GRAPH_2) ? 11 * z : 22 * z;
-        uint32_t e = qm * ((g + qm - 1) / qm);
-        for (auto rv : rv_list) {
-          for (auto nref : nref_list[bg]) {
-            std::vector<int8_t> llrs_in = allocate_random_i8(e);
-            std::vector<int8_t> llrs_out(n);
-            std::vector<int8_t> llrs_ref(n);
-            armral_ref_rate_recovery(bg, z, e, nref, rv, mod, llrs_in.data(),
-                                     llrs_ref.data());
-            ldpc_rate_recovery_under_test(bg, z, e, nref, rv, mod,
-                                          llrs_in.data(), llrs_out.data());
-            passed &= (llrs_ref == llrs_out);
+      for (auto len_filler_bits : filler_bits_list) {
+        if (z < 28) {
+          len_filler_bits = 0;
+        }
+        for (auto mod : mod_list) {
+          uint32_t qm = num_bit_per_symbol(mod);
+          uint32_t g = (bg == LDPC_BASE_GRAPH_2) ? 10 * z : 22 * z;
+          uint32_t e = qm * ((g + qm - 1) / qm);
+          for (auto rv : rv_list) {
+            for (auto nref : nref_list[bg]) {
+              std::vector<int8_t> llrs_in = allocate_random_i8(e);
+              std::vector<int8_t> llrs_out(n);
+              std::vector<int8_t> llrs_ref(n);
+              armral_ref_rate_recovery(bg, z, e, nref, len_filler_bits, g, rv,
+                                       mod, llrs_in.data(), llrs_ref.data());
+              ldpc_rate_recovery_under_test(bg, z, e, nref, len_filler_bits, g,
+                                            rv, mod, llrs_in.data(),
+                                            llrs_out.data());
+              passed &= (llrs_ref == llrs_out);
+            }
           }
         }
       }
@@ -259,7 +316,7 @@ int main(int argc, char **argv) {
   passed &= test_ldpc_rate_recovery(
       "LDPCRateRecoveryNoAlloc",
       [](armral_ldpc_graph_t bg, uint32_t z, uint32_t e, auto... args) {
-        std::vector<uint8_t> buffer(e);
+        std::vector<uint8_t> buffer(e + z * 66);
         return armral_ldpc_rate_recovery_noalloc(bg, z, e, args...,
                                                  buffer.data());
       });