diff --git a/bench/UpperPHY/LDPC/Decoding/main.cpp b/bench/UpperPHY/LDPC/Decoding/main.cpp index 59840decef7bd3a1b98dde54e16ec7cdda5a4661..6a04de3e7d0c21c51b10051b3089a19165644e33 100755 --- a/bench/UpperPHY/LDPC/Decoding/main.cpp +++ b/bench/UpperPHY/LDPC/Decoding/main.cpp @@ -12,14 +12,15 @@ namespace { -void run_ldpc_decoding_perf(armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, uint32_t num_its, +void run_ldpc_decoding_perf(uint32_t n, armral_ldpc_graph_t bg, uint32_t z, + uint32_t len_filler_bits, uint32_t num_its, + armral_ldpc_decode_options_t options, uint32_t num_reps) { printf("[LDPC DECODING] - base graph = %u, lifting size = %u, number of " "decoding " - "iterations = %u, index where CRC information begins = %u , number of " + "iterations = %u, with length of filler bits = %u , number of " "repetitions = %u\n", - (uint32_t)bg, z, num_its, crc_idx, num_reps); + (uint32_t)bg, z, num_its, len_filler_bits, num_reps); const auto *graph = armral_ldpc_get_base_graph(bg); @@ -31,18 +32,20 @@ void run_ldpc_decoding_perf(armral_ldpc_graph_t bg, uint32_t z, #ifdef ARMRAL_BENCH_NOALLOC auto buffer_size = - armral_ldpc_decode_block_noalloc_buffer_size(bg, z, crc_idx, num_its); + armral_ldpc_decode_block_noalloc_buffer_size(bg, z, num_its); std::vector buffer(buffer_size); for (uint32_t r = 0; r < num_reps; ++r) { buffer_bump_allocator allocator{buffer.data()}; armral::ldpc::decode_block( - llr_ptr, bg, z, crc_idx, num_its, out_ptr, allocator); + n, llr_ptr, bg, z, len_filler_bits, out_ptr, num_its, options, + allocator); } #else for (uint32_t r = 0; r < num_reps; ++r) { heap_allocator allocator{}; - armral::ldpc::decode_block(llr_ptr, bg, z, crc_idx, num_its, - out_ptr, allocator); + armral::ldpc::decode_block(n, llr_ptr, bg, z, + len_filler_bits, out_ptr, + num_its, options, allocator); } #endif } @@ -60,16 +63,25 @@ int main(int argc, char **argv) { // num_its - The number of iterations to perform in the decoding // num_reps - The number of times to repeat the decoding so as // to get a stable performance number - printf("Usage: %s base_graph z crc_idx num_its num_reps\n", argv[0]); + printf( + "Usage: %s n base_graph z len_filler_bits num_its options num_reps\n", + argv[0]); exit(EXIT_FAILURE); } auto bg = (armral_ldpc_graph_t)(atoi(argv[1]) - 1); auto z = (uint32_t)atoi(argv[2]); auto crc_idx = (uint32_t)atoi(argv[3]); + auto n = (uint32_t)((bg == 0) ? (z * 66) : (z * 50)); + auto nmessage_bits = bg == LDPC_BASE_GRAPH_1 ? z * 22 : z * 10; + auto len_filler_bits = (crc_idx != 0U) ? nmessage_bits - crc_idx - 24 : 0; auto num_its = (uint32_t)atoi(argv[4]); + auto options = + (armral_ldpc_decode_options_t)((crc_idx != 0U) + ? ARMRAL_LDPC_DEFAULT_OPTIONS + : ARMRAL_LDPC_CRC_NO); auto num_reps = (uint32_t)atoi(argv[5]); - run_ldpc_decoding_perf(bg, z, crc_idx, num_its, num_reps); + run_ldpc_decoding_perf(n, bg, z, len_filler_bits, num_its, options, num_reps); return EXIT_SUCCESS; } diff --git a/include/armral.h b/include/armral.h index ae446821c2515db1ab446cfc38dd7a512d4fcfe8..057087a99b692aaba9bb8c909a98795b5ca689df 100644 --- a/include/armral.h +++ b/include/armral.h @@ -3403,10 +3403,56 @@ typedef struct { } armral_ldpc_base_graph_t; /** - * \brief A constant which can be passed to `armral_ldpc_decode_block` when the - * input code block has no CRC attached. + * @enum armral_ldpc_decode_options_t + * @brief A constant which can be passed to `armral_ldpc_decode_block` relevant + * to the CRC operation type. + * + * This enumeration defines various options for LDPC decoding, + * including different CRC calculation methods, iteration-based CRC checks, and + * filler bit handling. + * + * Enumeration values: + * - `ARMRAL_LDPC_CRC_NO`: No CRC calculation. + * - `ARMRAL_LDPC_CRC_16`: CRC-16 checksum. + * - `ARMRAL_LDPC_CRC_24A`: CRC-24A checksum. + * - `ARMRAL_LDPC_CRC_24B`: CRC-24B checksum (default). + * - `ARMRAL_LDPC_CRC_EVERY_ITER`: Perform CRC check at every decoding iteration + * (default). + * - `ARMRAL_LDPC_CRC_END_ITER`: Perform CRC check only at the end of all + * decoding iterations. + * - `ARMRAL_LDPC_FILLER_BITS_IMPLICIT`: Implicit handling of filler bits. + * - `ARMRAL_LDPC_FILLER_BITS_EXPLICIT`: Explicit handling of filler bits + * (default). */ -#define ARMRAL_LDPC_NO_CRC 0 +typedef enum { + ARMRAL_LDPC_CRC_NO = 0, ///< No CRC calculation + ARMRAL_LDPC_CRC_16, ///< CRC-16 + ARMRAL_LDPC_CRC_24A, ///< CRC-24A + ARMRAL_LDPC_CRC_24B, ///< CRC-24B (default) + + ARMRAL_LDPC_CRC_EVERY_ITER = (1 << 2), ///< CRC every iteration (default) + ARMRAL_LDPC_CRC_END_ITER = (2 << 2), ///< CRC at end of iterations + + ARMRAL_LDPC_FILLER_BITS_IMPLICIT = (1 << 4), ///< Filler bits implicit + ///< Filler bits explicit (default) + ARMRAL_LDPC_FILLER_BITS_EXPLICIT = (2 << 4) +} armral_ldpc_decode_options_t; + +/** + * @brief Default options for ARMRAL LDPC processing. + * + * This macro defines the default configuration options for ARMRAL LDPC, + * combining the following flags: + * - `ARMRAL_LDPC_CRC_24B`: Enables 24-bit CRC checking. + * - `ARMRAL_LDPC_CRC_EVERY_ITER`: Performs CRC verification at every decoding + * iteration. + * - `ARMRAL_LDPC_FILLER_BITS_EXPLICIT`: Explicitly marks filler bits in + * decoding. + * + */ +#define ARMRAL_LDPC_DEFAULT_OPTIONS \ + (ARMRAL_LDPC_CRC_24B | ARMRAL_LDPC_CRC_EVERY_ITER | \ + ARMRAL_LDPC_FILLER_BITS_EXPLICIT) /** * Uses the identifier of a base graph to get the data structure that describes @@ -3562,29 +3608,46 @@ uint32_t armral_ldpc_encode_block_noalloc_buffer_size(armral_ldpc_graph_t bg, * (TS) 38.212. It is possible that there is no CRC data attached to the code * block, in which case `ARMRAL_LDPC_NO_CRC` can be passed. * + * @param[in] n The length of `llrs`. * @param[in] llrs The initial LLRs to use in the decoding. This is * typically the output after demodulation and rate * recovery. Supports 8 bit llrs in q1.7. * @param[in] bg The type of base graph to use for the decoding. * @param[in] z The lifting size. Valid values of the lifting size are * described in table 5.3.2-1 in TS 38.212. - * @param[in] crc_idx The index of the bit where the CRC attached to the code - * block begins. If there is no CRC attached, set this to - * `ARMRAL_LDPC_NO_CRC`. - * @param[in] num_its The maximum number of iterations of the LDPC decoder to - * run. The algorithm may terminate after fewer iterations - * if the current candidate codeword passes all the parity - * checks, or if it satisfies the CRC check. + * @param[in] len_filler_bits The number of filler bits. As per TS 38.212, + * section 5.2.2, filler bits insertion is needed to + * ensure that the code block segments have a valid + * length and are a multiple of the lifting size. + * Filler bits are used to calculate CRC internally. + * This is assumed to be a multiple of 8bits. * @param[out] data_out The decoded bits. These are of length `22 * z` for base * graph 1 and `10 * z` for base graph 2. It is assumed * that the array `data_out` is able to store this many * bits. + * @param[in] max_its The maximum number of iterations of the LDPC decoder to + * run. The algorithm may terminate after fewer iterations + * if the current candidate codeword passes all the parity + * checks, or if it satisfies the CRC check. + * @param[in] options It is an OR'd result of the below fields, + * CRC Type: + * ARMRAL_LDPC_CRC_NO + * ARMRAL_LDPC_CRC_16 + * ARMRAL_LDPC_CRC_24A + * ARMRAL_LDPC_CRC_24B (default) + * CRC Mode: + * ARMRAL_LDPC_CRC_EVERY_ITER (default) + * ARMRAL_LDPC_CRC_END_ITER + * Filler Bits: + * ARMRAL_LDPC_FILLER_BITS_IMPLICIT + * ARMRAL_LDPC_FILLER_BITS_EXPLICIT (default). * @return An `armral_status` value that indicates success or failure. */ -armral_status armral_ldpc_decode_block(const int8_t *llrs, +armral_status armral_ldpc_decode_block(uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, uint32_t num_its, - uint8_t *data_out); + uint32_t len_filler_bits, + uint8_t *data_out, uint32_t max_its, + armral_ldpc_decode_options_t options); /** * Non-allocating variant of \link armral_ldpc_decode_block \endlink. @@ -3612,31 +3675,46 @@ armral_status armral_ldpc_decode_block(const int8_t *llrs, * calling \link armral_ldpc_decode_block_noalloc_buffer_size \endlink * with identical inputs. * + * @param[in] n The length of `llrs`. * @param[in] llrs The initial LLRs to use in the decoding. This is * typically the output after demodulation and rate * recovery. Supports 8 bit llrs in q1.7. * @param[in] bg The type of base graph to use for the decoding. * @param[in] z The lifting size. Valid values of the lifting size are * described in table 5.3.2-1 in TS 38.212. - * @param[in] crc_idx The index of the bit where the CRC attached to the code - * block begins. If there is no CRC attached, set this to - * `ARMRAL_LDPC_NO_CRC`. - * @param[in] num_its The maximum number of iterations of the LDPC decoder to - * run. The algorithm may terminate after fewer iterations - * if the current candidate codeword passes all the parity - * checks, or if it satisfies the CRC check. + * @param[in] len_filler_bits The number of filler bits. As per TS 38.212, + * section 5.2.2, filler bits insertion is needed to + * ensure that the code block segments have a valid + * length and are a multiple of the lifting size. + * Filler bits are used to calculate CRC internally. + * This is assumed to be a multiple of 8bits. * @param[out] data_out The decoded bits. These are of length `22 * z` for base * graph 1 and `10 * z` for base graph 2. It is assumed * that the array `data_out` is able to store this many * bits. + * @param[in] max_its The maximum number of iterations of the LDPC decoder to + * run. The algorithm may terminate after fewer iterations + * if the current candidate codeword passes all the parity + * checks, or if it satisfies the CRC check. + * @param[in] options It is an OR'd result of the below fields, + * CRC Type: + * ARMRAL_LDPC_CRC_NO + * ARMRAL_LDPC_CRC_16 + * ARMRAL_LDPC_CRC_24A + * ARMRAL_LDPC_CRC_24B (default) + * CRC Mode: + * ARMRAL_LDPC_CRC_EVERY_ITER (default) + * ARMRAL_LDPC_CRC_END_ITER + * Filler Bits: + * ARMRAL_LDPC_FILLER_BITS_IMPLICIT + * ARMRAL_LDPC_FILLER_BITS_EXPLICIT (default). * @param[in] buffer Workspace buffer to be used internally. * @return An `armral_status` value that indicates success or failure. */ -armral_status armral_ldpc_decode_block_noalloc(const int8_t *llrs, - armral_ldpc_graph_t bg, - uint32_t z, uint32_t crc_idx, - uint32_t num_its, - uint8_t *data_out, void *buffer); +armral_status armral_ldpc_decode_block_noalloc( + uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, + uint32_t len_filler_bits, uint8_t *data_out, uint32_t max_its, + armral_ldpc_decode_options_t options, void *buffer); /** * Calculates the required buffer size in bytes needed to perform LDPC decoding @@ -3645,10 +3723,7 @@ armral_status armral_ldpc_decode_block_noalloc(const int8_t *llrs, * @param[in] bg The type of base graph to use for the decoding. * @param[in] z The lifting size. Valid values of the lifting size are * described in table 5.3.2-1 in TS 38.212. - * @param[in] crc_idx The index of the bit where the CRC attached to the code - * block begins. If there is no CRC attached, set this to - * `ARMRAL_LDPC_NO_CRC`. - * @param[in] num_its The maximum number of iterations of the LDPC decoder to + * @param[in] max_its The maximum number of iterations of the LDPC decoder to * run. The algorithm may terminate after fewer iterations * if the current candidate codeword passes all the parity * checks, or if it satisfies the CRC check. @@ -3656,8 +3731,7 @@ armral_status armral_ldpc_decode_block_noalloc(const int8_t *llrs, */ uint32_t armral_ldpc_decode_block_noalloc_buffer_size(armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, - uint32_t num_its); + uint32_t max_its); /** * Matches the rate of the code block encoded with LDPC code to the rate of the diff --git a/simulation/ldpc_awgn/ldpc_awgn.cpp b/simulation/ldpc_awgn/ldpc_awgn.cpp index 06331567aad3451bbf0597c3fb7d3b07e74c9384..29a7729103bbe6f4928f094a3a6f08d27e84a020 100644 --- a/simulation/ldpc_awgn/ldpc_awgn.cpp +++ b/simulation/ldpc_awgn/ldpc_awgn.cpp @@ -223,8 +223,9 @@ int run_check(armral::utils::random_state *state, uint32_t z, data->data_recovered); // Run LDPC decoding for a single block - armral_ldpc_decode_block(data->data_recovered, bg, z, ARMRAL_LDPC_NO_CRC, 10, - data->data_decoded); + armral_ldpc_decode_block(data->len_encoded, data->data_recovered, bg, z, + data->len_filler_bits, data->data_decoded, 10, + ARMRAL_LDPC_CRC_NO); // To make it easier to compare the values, convert the bit array to a byte // array diff --git a/src/UpperPHY/LDPC/arm_ldpc_decoder.cpp b/src/UpperPHY/LDPC/arm_ldpc_decoder.cpp index c00af85d01272c9cc9cc2e705885d2b596d949c4..a7e1d7ea1a58569bc41e541cafb6770037bde826 100644 --- a/src/UpperPHY/LDPC/arm_ldpc_decoder.cpp +++ b/src/UpperPHY/LDPC/arm_ldpc_decoder.cpp @@ -1296,7 +1296,7 @@ bool hard_decision(int16_t *ptr_l, uint8_t *crc_buff, uint8_t *ptr_data, uint32_t k, uint32_t crc_flag) { uint32_t num_lanes = get_num_lanes(); - uint32_t k_prime = k + 24; + uint32_t k_prime = k; uint32_t full_vec = (k_prime) / num_lanes; uint32_t tail_cnt = (k_prime) & (num_lanes - 1); uint8_t *data = (uint8_t *)crc_buff; @@ -1415,6 +1415,8 @@ bool hard_decision(int16_t *ptr_l, uint8_t *crc_buff, uint8_t *ptr_data, for (uint32_t i = 0; i < (k_prime >> 3); i++) { ptr_data[i] = crc_buff[i + pad_bytes]; } + } else { + memcpy(ptr_data, crc_buff, (k + 7) >> 3); } } else { memcpy(ptr_data, crc_buff, (k + 7) >> 3); @@ -1423,68 +1425,222 @@ bool hard_decision(int16_t *ptr_l, uint8_t *crc_buff, uint8_t *ptr_data, return (crc == 0U); } -inline void load_ptr_l(int16_t *ptr_l, const int8_t *llrs_ptr, - uint32_t len_in) { +inline void load_ptr_l(int16_t *ptr_l, const int8_t *llrs_ptr, uint32_t len_in, + uint32_t filler_bits_len, uint32_t k) { + + if (filler_bits_len == 0) { #if ARMRAL_ARCH_SVE >= 2 - svint8_t vec8; - svbool_t pg = svptrue_b8(); + svint8_t vec8; + svbool_t pg = svptrue_b8(); - uint32_t num_lanes = get_num_lanes(); - uint32_t full_blk = len_in / (2 * num_lanes); - uint32_t tail_cnt = len_in % (2 * num_lanes); + uint32_t num_lanes = get_num_lanes(); + uint32_t full_blk = len_in / (2 * num_lanes); + uint32_t tail_cnt = len_in % (2 * num_lanes); - for (uint32_t num_block = 0; num_block < full_blk; num_block++) { - vec8 = svld1_s8(pg, llrs_ptr); + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + vec8 = svld1_s8(pg, llrs_ptr); - svint16_t t1 = svmovlb_s16(vec8); - svint16_t t2 = svmovlt_s16(vec8); + svint16_t t1 = svmovlb_s16(vec8); + svint16_t t2 = svmovlt_s16(vec8); - svint16_t result1 = svzip1_s16(t1, t2); - svint16_t result2 = svzip2_s16(t1, t2); + svint16_t result1 = svzip1_s16(t1, t2); + svint16_t result2 = svzip2_s16(t1, t2); - svst1_s16(pg, ptr_l, result1); - ptr_l += num_lanes; - svst1_s16(pg, ptr_l, result2); - ptr_l += num_lanes; - llrs_ptr += 2 * num_lanes; - } + svst1_s16(pg, ptr_l, result1); + ptr_l += num_lanes; + svst1_s16(pg, ptr_l, result2); + ptr_l += num_lanes; + llrs_ptr += 2 * num_lanes; + } - if (tail_cnt != 0U) { - for (uint32_t i = 0; i < tail_cnt; i++) { - ptr_l[i] = (int16_t)llrs_ptr[i]; + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = (int16_t)llrs_ptr[i]; + } } - } #else - uint32_t full_blk = len_in / 16; - uint32_t tail_cnt = len_in % 16; - - for (uint32_t num_block = 0; num_block < full_blk; num_block++) { - int8x16_t vec = vld1q_s8(llrs_ptr); - int8x8_t vec_h = vget_high_s8(vec); - int16x8_t vec_h_16 = vmovl_s8(vec_h); - int8x8_t vec_l = vget_low_s8(vec); - int16x8_t vec_l_16 = vmovl_s8(vec_l); - vst1q_s16(ptr_l, vec_l_16); - ptr_l += 8; - vst1q_s16(ptr_l, vec_h_16); - llrs_ptr += 16; - ptr_l += 8; - } + uint32_t full_blk = len_in / 16; + uint32_t tail_cnt = len_in % 16; + + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + int8x16_t vec = vld1q_s8(llrs_ptr); + int8x8_t vec_h = vget_high_s8(vec); + int16x8_t vec_h_16 = vmovl_s8(vec_h); + int8x8_t vec_l = vget_low_s8(vec); + int16x8_t vec_l_16 = vmovl_s8(vec_l); + vst1q_s16(ptr_l, vec_l_16); + ptr_l += 8; + vst1q_s16(ptr_l, vec_h_16); + llrs_ptr += 16; + ptr_l += 8; + } - if (tail_cnt != 0U) { - for (uint32_t i = 0; i < tail_cnt; i++) { - ptr_l[i] = (int16_t)llrs_ptr[i]; + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = (int16_t)llrs_ptr[i]; + } + } + +#endif + } else { +#if ARMRAL_ARCH_SVE >= 2 + svint8_t vec8; + svbool_t pg = svptrue_b8(); + uint32_t num_lanes = get_num_lanes(); + + // copy k - filler_bits + uint32_t full_blk = (k - filler_bits_len) / (2 * num_lanes); + uint32_t tail_cnt = (k - filler_bits_len) % (2 * num_lanes); + + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + vec8 = svld1_s8(pg, llrs_ptr); + + svint16_t t1 = svmovlb_s16(vec8); + svint16_t t2 = svmovlt_s16(vec8); + + svint16_t result1 = svzip1_s16(t1, t2); + svint16_t result2 = svzip2_s16(t1, t2); + + svst1_s16(pg, ptr_l, result1); + ptr_l += num_lanes; + svst1_s16(pg, ptr_l, result2); + ptr_l += num_lanes; + llrs_ptr += 2 * num_lanes; + } + + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = (int16_t)llrs_ptr[i]; + } + llrs_ptr += tail_cnt; + ptr_l += tail_cnt; + } + + // set filler bits + full_blk = filler_bits_len / (2 * num_lanes); + tail_cnt = filler_bits_len % (2 * num_lanes); + svint16_t vec_filler_data_16 = svdup_n_s16(127); + + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + svst1_s16(pg, ptr_l, vec_filler_data_16); + ptr_l += num_lanes; + svst1_s16(pg, ptr_l, vec_filler_data_16); + ptr_l += num_lanes; + } + + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = 127; + } + ptr_l += tail_cnt; + } + + // copy parity bits + full_blk = (len_in - (k - filler_bits_len)) / (2 * num_lanes); + tail_cnt = (len_in - (k - filler_bits_len)) % (2 * num_lanes); + + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + vec8 = svld1_s8(pg, llrs_ptr); + + svint16_t t1 = svmovlb_s16(vec8); + svint16_t t2 = svmovlt_s16(vec8); + + svint16_t result1 = svzip1_s16(t1, t2); + svint16_t result2 = svzip2_s16(t1, t2); + + svst1_s16(pg, ptr_l, result1); + ptr_l += num_lanes; + svst1_s16(pg, ptr_l, result2); + ptr_l += num_lanes; + llrs_ptr += 2 * num_lanes; + } + + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = (int16_t)llrs_ptr[i]; + } + } + +#else + + // copy k - filler_bits + uint32_t full_blk = (k - filler_bits_len) / 16; + uint32_t tail_cnt = (k - filler_bits_len) % 16; + // int16_t *ptr_l1 = ptr_l; + + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + int8x16_t vec = vld1q_s8(llrs_ptr); + int8x8_t vec_h = vget_high_s8(vec); + int16x8_t vec_h_16 = vmovl_s8(vec_h); + int8x8_t vec_l = vget_low_s8(vec); + int16x8_t vec_l_16 = vmovl_s8(vec_l); + vst1q_s16(ptr_l, vec_l_16); + ptr_l += 8; + vst1q_s16(ptr_l, vec_h_16); + llrs_ptr += 16; + ptr_l += 8; + } + + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = (int16_t)llrs_ptr[i]; + } + llrs_ptr += tail_cnt; + ptr_l += tail_cnt; + } + + // set filler bits + full_blk = (filler_bits_len) / 16; + tail_cnt = (filler_bits_len) % 16; + int16x8_t vec_filler_data_16 = {127, 127, 127, 127, 127, 127, 127, 127}; + + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + vst1q_s16(ptr_l, vec_filler_data_16); + ptr_l += 8; + vst1q_s16(ptr_l, vec_filler_data_16); + ptr_l += 8; + } + + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = 127; + } + ptr_l += tail_cnt; + } + + // copy parity bits + full_blk = (len_in - (k - filler_bits_len)) / 16; + tail_cnt = (len_in - (k - filler_bits_len)) % 16; + + for (uint32_t num_block = 0; num_block < full_blk; num_block++) { + int8x16_t vec = vld1q_s8(llrs_ptr); + int8x8_t vec_h = vget_high_s8(vec); + int16x8_t vec_h_16 = vmovl_s8(vec_h); + int8x8_t vec_l = vget_low_s8(vec); + int16x8_t vec_l_16 = vmovl_s8(vec_l); + vst1q_s16(ptr_l, vec_l_16); + ptr_l += 8; + vst1q_s16(ptr_l, vec_h_16); + llrs_ptr += 16; + ptr_l += 8; + } + + if (tail_cnt != 0U) { + for (uint32_t i = 0; i < tail_cnt; i++) { + ptr_l[i] = (int16_t)llrs_ptr[i]; + } } - } #endif + } } template -bool decode_block(const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, uint32_t num_its, uint8_t *data_out, +bool decode_block(uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, + uint32_t z, uint32_t len_filler_bits, uint8_t *data_out, + uint32_t max_its, armral_ldpc_decode_options_t options, Allocator &allocator) { bool crc_passed = false; @@ -1525,6 +1681,11 @@ bool decode_block(const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, } uint32_t r_index = 0; + uint32_t no_of_rows = graph->nrows; + + if ((n + len_filler_bits) < z * graph->ncodeword_bits) { + no_of_rows = (n / z) - graph->nmessage_bits; + } // initialization with channel LLRs. 16-bit buffer "l" will be used for // in-place calculations @@ -1535,13 +1696,16 @@ bool decode_block(const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, memset(ptr_l, 0, sizeof(int16_t) * 2 * z); ptr_l = ptr_l + 2 * z; - load_ptr_l(ptr_l, llrs_ptr, graph->ncodeword_bits * z); + load_ptr_l(ptr_l, llrs_ptr, n, + ((options & (1 << 4)) == ARMRAL_LDPC_FILLER_BITS_IMPLICIT) * + len_filler_bits, + (bg == 0) ? (z * 20) : (z * 8)); uint32_t full_blk = z_len / num_lanes; - for (uint32_t it = 0; it < num_its; ++it) { + for (uint32_t it = 0; it < max_its; ++it) { r_index = 0; - for (uint32_t layer = 0; layer < graph->nrows; layer++) { + for (uint32_t layer = 0; layer < no_of_rows; layer++) { // reset the sign buffer memset(row_sign_array.get(), 0, sizeof(int16_t) * z); @@ -1582,24 +1746,31 @@ bool decode_block(const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, sign_scratch.get(), &r_index); } - // early exit if crc Passes - if (crc_idx) { - if (it < (num_its - 1)) { - crc_passed = - hard_decision(l.get(), crc_buff.get(), &data_out[0], crc_idx, true); - if (crc_passed) { - return crc_passed; - } + // decision and CRC at the iteration + if ((options & (1 << 2)) == ARMRAL_LDPC_CRC_EVERY_ITER) { + crc_passed = + hard_decision(l.get(), crc_buff.get(), &data_out[0], + graph->nmessage_bits * z - len_filler_bits, true); + if (crc_passed) { + break; } } } - if (crc_idx == ARMRAL_LDPC_NO_CRC) { // do only decisions - crc_passed = hard_decision(l.get(), crc_buff.get(), &data_out[0], - graph->nmessage_bits * z, false); - } else { + // do decision and CRC + if ((options & (2 << 2)) == ARMRAL_LDPC_CRC_END_ITER) { + // ignore filler bits + crc_passed = + hard_decision(l.get(), crc_buff.get(), &data_out[0], + graph->nmessage_bits * z - len_filler_bits, true); + } + + // do only decisions + if ((options & 3) == ARMRAL_LDPC_CRC_NO) { + // ignore filler bits crc_passed = - hard_decision(l.get(), crc_buff.get(), &data_out[0], crc_idx, true); + hard_decision(l.get(), crc_buff.get(), &data_out[0], + graph->nmessage_bits * z - len_filler_bits, false); } return crc_passed; } @@ -1607,41 +1778,43 @@ bool decode_block(const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, } // namespace armral::ldpc template bool armral::ldpc::decode_block( - const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, uint32_t crc_idx, - uint32_t num_its, uint8_t *data_out, heap_allocator &); + uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, + uint32_t len_filler_bits, uint8_t *data_out, uint32_t max_its, + armral_ldpc_decode_options_t options, heap_allocator &); template bool armral::ldpc::decode_block( - const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, uint32_t crc_idx, - uint32_t num_its, uint8_t *data_out, buffer_bump_allocator &); + uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, + uint32_t len_filler_bits, uint8_t *data_out, uint32_t max_its, + armral_ldpc_decode_options_t options, buffer_bump_allocator &); -armral_status armral_ldpc_decode_block(const int8_t *llrs, +armral_status armral_ldpc_decode_block(uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, uint32_t num_its, - uint8_t *data_out) { + uint32_t len_filler_bits, + uint8_t *data_out, uint32_t max_its, + armral_ldpc_decode_options_t options) { heap_allocator allocator{}; - bool result = armral::ldpc::decode_block(llrs, bg, z, crc_idx, num_its, - data_out, allocator); + bool result = armral::ldpc::decode_block( + n, llrs, bg, z, len_filler_bits, data_out, max_its, options, allocator); return (result) ? ARMRAL_SUCCESS : ARMRAL_RESULT_FAIL; } -armral_status -armral_ldpc_decode_block_noalloc(const int8_t *llrs, armral_ldpc_graph_t bg, - uint32_t z, uint32_t crc_idx, uint32_t num_its, - uint8_t *data_out, void *buffer) { +armral_status armral_ldpc_decode_block_noalloc( + uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, + uint32_t len_filler_bits, uint8_t *data_out, uint32_t max_its, + armral_ldpc_decode_options_t options, void *buffer) { buffer_bump_allocator allocator{buffer}; - bool result = armral::ldpc::decode_block(llrs, bg, z, crc_idx, num_its, - data_out, allocator); + bool result = armral::ldpc::decode_block( + n, llrs, bg, z, len_filler_bits, data_out, max_its, options, allocator); return (result) ? ARMRAL_SUCCESS : ARMRAL_RESULT_FAIL; } uint32_t armral_ldpc_decode_block_noalloc_buffer_size(armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, - uint32_t num_its) { + uint32_t max_its) { counting_allocator allocator{}; - armral::ldpc::decode_block(nullptr, bg, z, crc_idx, num_its, nullptr, - allocator); + armral::ldpc::decode_block(0, nullptr, bg, z, 0, nullptr, max_its, + ARMRAL_LDPC_CRC_NO, allocator); return allocator.required_bytes(); } diff --git a/src/UpperPHY/LDPC/ldpc_coding.hpp b/src/UpperPHY/LDPC/ldpc_coding.hpp index b13f508b0ce5f2692a462c3d3a9972d3be00812e..aba68bae92251fb62c60541356e874a787364a58 100644 --- a/src/UpperPHY/LDPC/ldpc_coding.hpp +++ b/src/UpperPHY/LDPC/ldpc_coding.hpp @@ -15,8 +15,9 @@ constexpr uint32_t num_lifting_sets = 8; uint32_t get_lifting_index(uint32_t lifting_size); template -bool decode_block(const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, uint32_t num_its, uint8_t *data_out, +bool decode_block(uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, + uint32_t z, uint32_t len_filler_bits, uint8_t *data_out, + uint32_t max_its, armral_ldpc_decode_options_t options, Allocator &allocator); } // namespace armral::ldpc diff --git a/test/UpperPHY/LDPC/Decoding/main.cpp b/test/UpperPHY/LDPC/Decoding/main.cpp index a722b4a2df2dcff725ede3de044aac73a75e2a2c..0c494a09811ec13f3c31b740b718b4018c968f2d 100644 --- a/test/UpperPHY/LDPC/Decoding/main.cpp +++ b/test/UpperPHY/LDPC/Decoding/main.cpp @@ -100,32 +100,57 @@ bool check_decoded_message(uint32_t len, const uint8_t *orig, template bool run_ldpc_decoding_test(uint32_t its, uint32_t z, armral_ldpc_graph_t bg, - uint32_t crc_idx, + armral_ldpc_decode_options_t options, + uint32_t len_filler_bits, LDPCDecodingFunction ldpc_decoding_under_test) { bool passed = true; const auto *graph = armral_ldpc_get_base_graph(bg); + uint32_t n; // Allocate a random input to be encoded - uint32_t len_in = z * graph->nmessage_bits; + uint32_t len_in = z * graph->nmessage_bits; // z * 22 or z * 10 + uint32_t buf_len_in = z * graph->nmessage_bits; + + if (len_filler_bits) { + len_in -= len_filler_bits; + } + armral::utils::int_random random; - auto to_encode = random.vector((len_in + 7) / 8); + auto to_encode = random.vector((buf_len_in + 7) / 8); // If we are doing CRC checking, then we need to attach CRC bits to the input - if (crc_idx != ARMRAL_LDPC_NO_CRC) { - auto info_to_encode = random.vector((len_in + 7) / 8); - len_in = crc_idx + 24; - ldpc_crc_attachment(info_to_encode.data(), len_in, z * graph->nmessage_bits, + if ((options & 3) != ARMRAL_LDPC_CRC_NO) { + auto info_to_encode = random.vector((buf_len_in + 7) / 8); + ldpc_crc_attachment(info_to_encode.data(), len_in, len_in - 24, to_encode.data()); + if (len_filler_bits) { + memset(&to_encode.data()[(len_in + 7) >> 3], 0, + sizeof(uint8_t) * ((len_filler_bits + 7) >> 3)); + } } uint32_t encoded_len = z * graph->ncodeword_bits; auto encoded = random.vector((encoded_len + 7) / 8); - uint32_t len_filler_bits = 0; + // Encode the data armral_ldpc_encode_block(to_encode.data(), bg, z, len_filler_bits, encoded.data()); + // Simulate filler bits removal to create test data for + // 'ARMRAL_LDPC_FILLER_BITS_IMPLICIT' + if ((options & (1 << 4)) == ARMRAL_LDPC_FILLER_BITS_IMPLICIT) { + uint32_t len_s_f_bytes = (len_in - 2 * z + len_filler_bits) >> 3; + uint32_t len_s_bytes = (len_in - 2 * z) >> 3; + uint32_t len_p_bytes = + (encoded_len - (len_in - 2 * z + len_filler_bits)) >> 3; + + uint8_t *buf_1 = encoded.data(); + memcpy((void *)&buf_1[len_s_bytes], &buf_1[len_s_f_bytes], len_p_bytes); + + encoded_len -= len_filler_bits; + } + // run modulation armral_modulation_type mod_type = ARMRAL_MOD_16QAM; int mod_num_symbols = (encoded_len + 3) / 4; @@ -142,10 +167,20 @@ bool run_ldpc_decoding_test(uint32_t its, uint32_t z, armral_ldpc_graph_t bg, data_demod_soft.data()); auto decoded = random.vector((len_in + 7) / 8); - if (ldpc_decoding_under_test(data_demod_soft.data(), bg, z, crc_idx, its, - decoded.data()) != ARMRAL_SUCCESS) { + + if ((z == 56) || (z == 144)) { + // e < n + n = (encoded_len / 2); + } else { + n = encoded_len; + } + + if (ldpc_decoding_under_test(n, data_demod_soft.data(), bg, z, + len_filler_bits, decoded.data(), its, + options) != ARMRAL_SUCCESS) { return false; } + auto decoded_bytes = armral::bits_to_bytes(len_in, decoded.data()); // Also check that the decoded message is equal to the original message @@ -164,19 +199,17 @@ bool run_all_tests(char const *name, std::array bgs{LDPC_BASE_GRAPH_1, LDPC_BASE_GRAPH_2}; std::array num_its{1, 2, 5, 10}; std::array zs{2, 6, 13, 20, 30, 56, 144, 208, 224, 256, 320, 384}; - // Crc-index is zero based indexing - std::array crc_idx_1{4225, 4553, 5257, 6313, 7721}; - std::array crc_idx_2{1921, 2057, 2377, 2857, 3497}; for (auto bg : bgs) { - const auto &crc_ids = (bg == 0) ? crc_idx_1 : crc_idx_2; for (uint32_t i = 0; i < zs.size(); i++) { auto z = zs[i]; - assert(z < 208 || i >= 7); - auto crc_idx = (z >= 208) ? (crc_ids[i - 7] - 1) : ARMRAL_LDPC_NO_CRC; + armral_ldpc_decode_options_t options = armral_ldpc_decode_options_t( + (z >= 208) ? ARMRAL_LDPC_DEFAULT_OPTIONS : 0); + uint32_t len_filler_bits = (z >= 208) ? 32 : 0; for (auto its : num_its) { - printf("[%s] z = %d, crc_idx = %u, its = %d\n", name, z, crc_idx, its); - auto check = run_ldpc_decoding_test(its, z, bg, crc_idx, - ldpc_decoding_under_test); + printf("[%s] z = %d, its = %d len_filler_bits = %d\n", name, z, its, + len_filler_bits); + auto check = run_ldpc_decoding_test( + its, z, bg, options, len_filler_bits, ldpc_decoding_under_test); if (!check) { // GCOVR_EXCL_START printf("Decoding failed\n"); @@ -196,13 +229,15 @@ int main(int argc, char **argv) { passed &= run_all_tests( "LDPCDecodingNoAlloc", - [](const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, - uint32_t crc_idx, uint32_t num_its, uint8_t *data_out) { - auto buffer_size = armral_ldpc_decode_block_noalloc_buffer_size( - bg, z, crc_idx, num_its); + [](uint32_t n, const int8_t *llrs, armral_ldpc_graph_t bg, uint32_t z, + uint32_t len_filler_bits, uint8_t *data_out, uint32_t max_its, + armral_ldpc_decode_options_t options) { + auto buffer_size = + armral_ldpc_decode_block_noalloc_buffer_size(bg, z, max_its); std::vector buffer(buffer_size); - return armral_ldpc_decode_block_noalloc(llrs, bg, z, crc_idx, num_its, - data_out, buffer.data()); + return armral_ldpc_decode_block_noalloc(n, llrs, bg, z, len_filler_bits, + data_out, max_its, options, + buffer.data()); }); exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);