diff --git a/lib/Makefile b/lib/Makefile index f4c61c3d6a49f560e8272e09866b95d8ba82bffd..f0d6a08fdfb8c303a381a27312893f7c0b6b6845 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -294,7 +294,9 @@ c_lib_objs := \ zuc_simd_no_aesni.o \ zuc_aarch64_top.o \ mb_mgr_zuc_submit_flush_aarch64.o \ - mb_mgr_zuc_submit_flush_aarch64_no_aesni.o + mb_mgr_zuc_submit_flush_aarch64_no_aesni.o \ + mb_mgr_snow3g_submit_flush_aarch64.o \ + mb_mgr_snow3g_submit_flush_aarch64_no_aesni.o asm_generic_lib_objs := \ lookup_16x8bit_neon.o else diff --git a/lib/aarch64/aesni_emu_aarch64.h b/lib/aarch64/aesni_emu_aarch64.h new file mode 100644 index 0000000000000000000000000000000000000000..c222ea15e56e3d3b120fc881dd33abc0611116b6 --- /dev/null +++ b/lib/aarch64/aesni_emu_aarch64.h @@ -0,0 +1,170 @@ +/********************************************************************** + Copyright(c) 2022 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef AESNI_EMU_AARCH64_H +#define AESNI_EMU_AARCH64_H + +#include "aesni_emu.h" + +#include "aarch64/constant_lookup_aarch64.h" +#include + +static const DECLARE_ALIGNED(uint8_t aes_sbox[16][16], 16) = { + { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 }, + { 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 }, + { 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 }, + { 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 }, + { 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 }, + { 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf }, + { 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 }, + { 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 }, + { 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 }, + { 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb }, + { 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 }, + { 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 }, + { 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a }, + { 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e }, + { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf }, + { 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 } +}; + +static const DECLARE_ALIGNED(uint8_t aes_isbox[16][16], 16) = { + { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb }, + { 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb }, + { 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e }, + { 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 }, + { 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 }, + { 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 }, + { 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 }, + { 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b }, + { 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 }, + { 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e }, + { 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b }, + { 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 }, + { 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f }, + { 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef }, + { 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 }, + { 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d } +}; + +/* ========================================================================== */ +/* Emulation API helper functions */ +/* ========================================================================== */ + +static void xor_xmm(union xmm_reg *d, + const union xmm_reg *s1, + const union xmm_reg *s2) +{ + uint32_t i; + + for (i = 0; i < MAX_QWORDS_PER_XMM; i++) + d->qword[i] = s1->qword[i] ^ s2->qword[i]; +} + +static void substitute_bytes(union xmm_reg *dst, const union xmm_reg *src) +{ + uint8x16_t vx = vld1q_u8((uint8_t const *) &src->byte[0]); + + IMB_ASSERT(MAX_BYTES_PER_XMM == 16); + + vx = lookup_16x8bit_neon(vx, aes_sbox); + vst1q_u8((uint8_t *) &dst->byte[0], vx); +} + +static uint8_t gfmul(const uint8_t x, const uint8_t y) +{ + uint32_t i; + uint8_t multiplier = y; + uint8_t out = 0; + + for (i = 0; i < 7; i++) { + if (i >= 1) { + /* GFMUL by 2. "xtimes" operation from FIPS document */ + uint8_t t = multiplier << 1; /* lop of the high bit */ + + if (multiplier >> 7) /* look at the old high bit */ + multiplier = t ^ 0x1B; /* polynomial division */ + else + multiplier = t; + } + if ((x >> i) & 1) + out = out ^ multiplier; + } + + return out; +} + +static void mix_columns(union xmm_reg *dst, const union xmm_reg *src) +{ + uint32_t c; + + for (c = 0; c < MAX_DWORDS_PER_XMM; c++) { + uint8_t s0c = src->byte[c*4+0]; + uint8_t s1c = src->byte[c*4+1]; + uint8_t s2c = src->byte[c*4+2]; + uint8_t s3c = src->byte[c*4+3]; + + dst->byte[c*4+0] = gfmul(2, s0c) ^ gfmul(3, s1c) ^ s2c ^ s3c; + dst->byte[c*4+1] = s0c ^ gfmul(2, s1c) ^ gfmul(3, s2c) ^ s3c; + dst->byte[c*4+2] = s0c ^ s1c ^ gfmul(2, s2c) ^ gfmul(3, s3c); + dst->byte[c*4+3] = gfmul(3, s0c) ^ s1c ^ s2c ^ gfmul(2, s3c); + } +} +#endif /* AESNI_EMU_AARCH64_H */ diff --git a/lib/aarch64/alloc_aarch64.c b/lib/aarch64/alloc_aarch64.c index 6677bce8e415cfcec05fe93adf8138654817be29..eb303b6c46921e1008de5d62e7dce31be7f903ab 100644 --- a/lib/aarch64/alloc_aarch64.c +++ b/lib/aarch64/alloc_aarch64.c @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2021 Arm Corporation All rights reserved. + Copyright(c) 2021-2022 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -56,6 +56,8 @@ const struct { OOO_INFO(zuc_eia3_ooo, MB_MGR_ZUC_OOO), OOO_INFO(zuc256_eea3_ooo, MB_MGR_ZUC_OOO), OOO_INFO(zuc256_eia3_ooo, MB_MGR_ZUC_OOO), + OOO_INFO(snow3g_uea2_ooo, MB_MGR_SNOW3G_OOO), + OOO_INFO(snow3g_uia2_ooo, MB_MGR_SNOW3G_OOO), }; /** diff --git a/lib/aarch64/mb_mgr_aarch64.c b/lib/aarch64/mb_mgr_aarch64.c index cd7da998a31be7d7ea046a7a96f783a684edcd2f..e1c19d74cedb31c642f82cf995e5df91d1a357dc 100644 --- a/lib/aarch64/mb_mgr_aarch64.c +++ b/lib/aarch64/mb_mgr_aarch64.c @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2021 Arm Corporation All rights reserved. + Copyright(c) 2021-2022 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -72,6 +72,13 @@ IMB_JOB *submit_job_zuc256_eia3_aarch64_common(MB_MGR_ZUC_OOO *state, IMB_JOB *job); IMB_JOB *flush_job_zuc256_eia3_aarch64_common(MB_MGR_ZUC_OOO *state); +IMB_JOB *submit_job_snow3g_uea2_aarch64_common(IMB_MGR *state, + IMB_JOB *job); +IMB_JOB *flush_job_snow3g_uea2_aarch64_common(IMB_MGR *state); + +IMB_JOB *submit_job_snow3g_uia2_aarch64_common(IMB_MGR *state, + IMB_JOB *job); +IMB_JOB *flush_job_snow3g_uia2_aarch64_common(IMB_MGR *state); /* ====================================================================== */ #define SUBMIT_JOB submit_job_aarch64 @@ -96,6 +103,11 @@ IMB_JOB *flush_job_zuc256_eia3_aarch64_common(MB_MGR_ZUC_OOO *state); #define FLUSH_JOB_ZUC256_EEA3 flush_job_zuc256_eea3_aarch64 #define SUBMIT_JOB_ZUC256_EIA3 submit_job_zuc256_eia3_aarch64 #define FLUSH_JOB_ZUC256_EIA3 flush_job_zuc256_eia3_aarch64 +#define SUBMIT_JOB_SNOW3G_UEA2 submit_job_snow3g_uea2_aarch64 +#define FLUSH_JOB_SNOW3G_UEA2 flush_job_snow3g_uea2_aarch64 +#define SUBMIT_JOB_SNOW3G_UIA2 submit_job_snow3g_uia2_aarch64 +#define FLUSH_JOB_SNOW3G_UIA2 flush_job_snow3g_uia2_aarch64 + static IMB_JOB * (*submit_job_zuc_eea3_aarch64)(MB_MGR_ZUC_OOO *state, IMB_JOB *job) = @@ -129,6 +141,21 @@ static IMB_JOB * (*flush_job_zuc256_eia3_aarch64)(MB_MGR_ZUC_OOO *state) = flush_job_zuc256_eia3_aarch64_common; +static IMB_JOB * +(*submit_job_snow3g_uea2_aarch64)(IMB_MGR *state, IMB_JOB *job) = + submit_job_snow3g_uea2_aarch64_common; + +static IMB_JOB * +(*flush_job_snow3g_uea2_aarch64)(IMB_MGR *state) = + flush_job_snow3g_uea2_aarch64_common; + +static IMB_JOB * +(*submit_job_snow3g_uia2_aarch64)(IMB_MGR *state, IMB_JOB *job) = + submit_job_snow3g_uia2_aarch64_common; + +static IMB_JOB * +(*flush_job_snow3g_uia2_aarch64)(IMB_MGR *state) = + flush_job_snow3g_uia2_aarch64_common; static void reset_ooo_mgrs(IMB_MGR *state) { @@ -136,6 +163,8 @@ reset_ooo_mgrs(IMB_MGR *state) MB_MGR_ZUC_OOO *zuc_eia3_ooo = state->zuc_eia3_ooo; MB_MGR_ZUC_OOO *zuc256_eea3_ooo = state->zuc256_eea3_ooo; MB_MGR_ZUC_OOO *zuc256_eia3_ooo = state->zuc256_eia3_ooo; + MB_MGR_SNOW3G_OOO *snow3g_uea2_ooo = state->snow3g_uea2_ooo; + MB_MGR_SNOW3G_OOO *snow3g_uia2_ooo = state->snow3g_uia2_ooo; /* Init ZUC out-of-order fields */ memset(zuc_eea3_ooo->lens, 0, @@ -182,6 +211,34 @@ reset_ooo_mgrs(IMB_MGR *state) zuc256_eia3_ooo->init_not_done = 0; zuc256_eia3_ooo->unused_lane_bitmask = 0x0f; + /* Init SNOW3G out-of-order fields */ + memset(snow3g_uea2_ooo->lens, 0, + sizeof(snow3g_uea2_ooo->lens)); + memset(snow3g_uea2_ooo->job_in_lane, 0, + sizeof(snow3g_uea2_ooo->job_in_lane)); + memset(snow3g_uea2_ooo->bits_fixup, 0, + sizeof(snow3g_uea2_ooo->bits_fixup)); + snow3g_uea2_ooo->init_mask = 0; + snow3g_uea2_ooo->unused_lanes = 0xFF03020100; + snow3g_uea2_ooo->num_lanes_inuse = 0; + snow3g_uea2_ooo->init_done = 0; + memset(snow3g_uea2_ooo->ks, 0, + sizeof(snow3g_uea2_ooo->ks)); + snow3g_uea2_ooo->road_block = 0; + + memset(snow3g_uia2_ooo->lens, 0, + sizeof(snow3g_uia2_ooo->lens)); + memset(snow3g_uia2_ooo->job_in_lane, 0, + sizeof(snow3g_uia2_ooo->job_in_lane)); + memset(snow3g_uia2_ooo->bits_fixup, 0, + sizeof(snow3g_uia2_ooo->bits_fixup)); + snow3g_uia2_ooo->init_mask = 0; + snow3g_uia2_ooo->unused_lanes = 0xFF03020100; + snow3g_uia2_ooo->num_lanes_inuse = 0; + snow3g_uia2_ooo->init_done = 0; + memset(snow3g_uia2_ooo->ks, 0, + sizeof(snow3g_uia2_ooo->ks)); + snow3g_uia2_ooo->road_block = 0; return; } @@ -247,6 +304,7 @@ init_mb_mgr_aarch64_internal(IMB_MGR *state, const int reset_mgrs) state->snow3g_f8_4_buffer = snow3g_f8_4_buffer_aarch64; state->snow3g_f8_8_buffer = snow3g_f8_8_buffer_aarch64; state->snow3g_f8_n_buffer = snow3g_f8_n_buffer_aarch64; + state->snow3g_f8_4_buffer_multikey = snow3g_f8_4_buffer_multikey_aarch64; state->snow3g_f8_8_buffer_multikey = snow3g_f8_8_buffer_multikey_aarch64; state->snow3g_f8_n_buffer_multikey = snow3g_f8_n_buffer_multikey_aarch64; state->snow3g_f9_1_buffer = snow3g_f9_1_buffer_aarch64; diff --git a/lib/aarch64/mb_mgr_aarch64_no_aesni.c b/lib/aarch64/mb_mgr_aarch64_no_aesni.c index bcf6f45705f00be9c59fb05d7af5e8824f24f852..fedb481738feb8eb498295612a5433eba22675b6 100644 --- a/lib/aarch64/mb_mgr_aarch64_no_aesni.c +++ b/lib/aarch64/mb_mgr_aarch64_no_aesni.c @@ -1,5 +1,5 @@ /********************************************************************* - Copyright(c) 2021 Arm Corporation All rights reserved. + Copyright(c) 2021-2022 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -53,6 +53,14 @@ IMB_JOB *flush_job_zuc_eia3_aarch64_no_aesni(MB_MGR_ZUC_OOO *state); IMB_JOB *submit_job_zuc256_eia3_aarch64_no_aesni(MB_MGR_ZUC_OOO *state, IMB_JOB *job); IMB_JOB *flush_job_zuc256_eia3_aarch64_no_aesni(MB_MGR_ZUC_OOO *state); + +IMB_JOB *submit_job_snow3g_uea2_aarch64_no_aesni(IMB_MGR *state, + IMB_JOB *job); +IMB_JOB *flush_job_snow3g_uea2_aarch64_no_aesni(IMB_MGR *state); + +IMB_JOB *submit_job_snow3g_uia2_aarch64_no_aesni(IMB_MGR *state, + IMB_JOB *job); +IMB_JOB *flush_job_snow3g_uia2_aarch64_no_aesni(IMB_MGR *state); /* ====================================================================== */ #define SUBMIT_JOB submit_job_aarch64_no_aesni @@ -77,6 +85,11 @@ IMB_JOB *flush_job_zuc256_eia3_aarch64_no_aesni(MB_MGR_ZUC_OOO *state); #define FLUSH_JOB_ZUC256_EEA3 flush_job_zuc256_eea3_aarch64_no_aesni #define SUBMIT_JOB_ZUC256_EIA3 submit_job_zuc256_eia3_aarch64_no_aesni #define FLUSH_JOB_ZUC256_EIA3 flush_job_zuc256_eia3_aarch64_no_aesni +#define SUBMIT_JOB_SNOW3G_UEA2 submit_job_snow3g_uea2_aarch64_no_aesni +#define FLUSH_JOB_SNOW3G_UEA2 flush_job_snow3g_uea2_aarch64_no_aesni +#define SUBMIT_JOB_SNOW3G_UIA2 submit_job_snow3g_uia2_aarch64_no_aesni +#define FLUSH_JOB_SNOW3G_UIA2 flush_job_snow3g_uia2_aarch64_no_aesni + static void reset_ooo_mgrs(IMB_MGR *state) @@ -85,6 +98,8 @@ reset_ooo_mgrs(IMB_MGR *state) MB_MGR_ZUC_OOO *zuc256_eea3_ooo = state->zuc256_eea3_ooo; MB_MGR_ZUC_OOO *zuc_eia3_ooo = state->zuc_eia3_ooo; MB_MGR_ZUC_OOO *zuc256_eia3_ooo = state->zuc256_eia3_ooo; + MB_MGR_SNOW3G_OOO *snow3g_uea2_ooo = state->snow3g_uea2_ooo; + MB_MGR_SNOW3G_OOO *snow3g_uia2_ooo = state->snow3g_uia2_ooo; /* Init ZUC out-of-order fields */ memset(zuc_eea3_ooo->lens, 0, @@ -131,6 +146,34 @@ reset_ooo_mgrs(IMB_MGR *state) zuc256_eia3_ooo->init_not_done = 0; zuc256_eia3_ooo->unused_lane_bitmask = 0x0f; + /* Init SNOW3G out-of-order fields */ + memset(snow3g_uea2_ooo->lens, 0, + sizeof(snow3g_uea2_ooo->lens)); + memset(snow3g_uea2_ooo->job_in_lane, 0, + sizeof(snow3g_uea2_ooo->job_in_lane)); + memset(snow3g_uea2_ooo->bits_fixup, 0, + sizeof(snow3g_uea2_ooo->bits_fixup)); + snow3g_uea2_ooo->init_mask = 0; + snow3g_uea2_ooo->unused_lanes = 0xFF03020100; + snow3g_uea2_ooo->num_lanes_inuse = 0; + snow3g_uea2_ooo->init_done = 0; + memset(snow3g_uea2_ooo->ks, 0, + sizeof(snow3g_uea2_ooo->ks)); + snow3g_uea2_ooo->road_block = 0; + + memset(snow3g_uia2_ooo->lens, 0, + sizeof(snow3g_uia2_ooo->lens)); + memset(snow3g_uia2_ooo->job_in_lane, 0, + sizeof(snow3g_uia2_ooo->job_in_lane)); + memset(snow3g_uia2_ooo->bits_fixup, 0, + sizeof(snow3g_uia2_ooo->bits_fixup)); + snow3g_uia2_ooo->init_mask = 0; + snow3g_uia2_ooo->unused_lanes = 0xFF03020100; + snow3g_uia2_ooo->num_lanes_inuse = 0; + snow3g_uia2_ooo->init_done = 0; + memset(snow3g_uia2_ooo->ks, 0, + sizeof(snow3g_uia2_ooo->ks)); + snow3g_uia2_ooo->road_block = 0; return; } @@ -179,6 +222,8 @@ init_mb_mgr_aarch64_no_aesni_internal(IMB_MGR *state, const int reset_mgrs) state->snow3g_f8_4_buffer = snow3g_f8_4_buffer_aarch64_no_aesni; state->snow3g_f8_8_buffer = snow3g_f8_8_buffer_aarch64_no_aesni; state->snow3g_f8_n_buffer = snow3g_f8_n_buffer_aarch64_no_aesni; + state->snow3g_f8_4_buffer_multikey = + snow3g_f8_4_buffer_multikey_aarch64_no_aesni; state->snow3g_f8_8_buffer_multikey = snow3g_f8_8_buffer_multikey_aarch64_no_aesni; state->snow3g_f8_n_buffer_multikey = diff --git a/lib/aarch64/mb_mgr_code_aarch64.h b/lib/aarch64/mb_mgr_code_aarch64.h index 45b4545e2d9702f8c60b6a746cadd16fa8365255..560adc58ebcec429719ccffc5a525bea38bf9144 100644 --- a/lib/aarch64/mb_mgr_code_aarch64.h +++ b/lib/aarch64/mb_mgr_code_aarch64.h @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2021 Arm Corporation All rights reserved. + Copyright(c) 2021-2022 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -128,7 +128,7 @@ SUBMIT_JOB_AES_ENC(IMB_MGR *state, IMB_JOB *job) MB_MGR_ZUC_OOO *zuc256_eea3_ooo = state->zuc256_eea3_ooo; if (IMB_CIPHER_SNOW3G_UEA2_BITLEN == job->cipher_mode) { - return submit_snow3g_uea2_job(state, job); + return SUBMIT_JOB_SNOW3G_UEA2(state, job); } else if (IMB_CIPHER_ZUC_EEA3 == job->cipher_mode) { if (16 == job->key_len_in_bytes) { return SUBMIT_JOB_ZUC_EEA3(zuc_eea3_ooo, job); @@ -154,6 +154,8 @@ FLUSH_JOB_AES_ENC(IMB_MGR *state, IMB_JOB *job) } else { /* assume 32 */ return FLUSH_JOB_ZUC256_EEA3(zuc256_eea3_ooo); } + } else if (IMB_CIPHER_SNOW3G_UEA2_BITLEN == job->cipher_mode){ + return FLUSH_JOB_SNOW3G_UEA2(state); } else { /* assume IMB_CIPHER_NULL */ return NULL; } @@ -167,7 +169,7 @@ SUBMIT_JOB_AES_DEC(IMB_MGR *state, IMB_JOB *job) MB_MGR_ZUC_OOO *zuc256_eea3_ooo = state->zuc256_eea3_ooo; if (IMB_CIPHER_SNOW3G_UEA2_BITLEN == job->cipher_mode) { - return submit_snow3g_uea2_job(state, job); + return SUBMIT_JOB_SNOW3G_UEA2(state, job); } else if(IMB_CIPHER_ZUC_EEA3 == job->cipher_mode) { if (16 == job->key_len_in_bytes) { return SUBMIT_JOB_ZUC_EEA3(zuc_eea3_ooo, job); @@ -194,6 +196,8 @@ FLUSH_JOB_AES_DEC(IMB_MGR *state, IMB_JOB *job) } else { /* assume 32 */ return FLUSH_JOB_ZUC256_EEA3(zuc256_eea3_ooo); } + } else if (IMB_CIPHER_SNOW3G_UEA2_BITLEN == job->cipher_mode){ + return FLUSH_JOB_SNOW3G_UEA2(state); } (void) state; return NULL; @@ -231,14 +235,7 @@ SUBMIT_JOB_HASH(IMB_MGR *state, IMB_JOB *job) switch (job->hash_alg) { case IMB_AUTH_SNOW3G_UIA2_BITLEN: - IMB_SNOW3G_F9_1_BUFFER(state, (const snow3g_key_schedule_t *) - job->u.SNOW3G_UIA2._key, - job->u.SNOW3G_UIA2._iv, - job->src + job->hash_start_src_offset_in_bytes, - job->msg_len_to_hash_in_bits, - job->auth_tag_output); - job->status |= IMB_STATUS_COMPLETED_AUTH; - return job; + return SUBMIT_JOB_SNOW3G_UIA2(state, job); case IMB_AUTH_ZUC_EIA3_BITLEN: return SUBMIT_JOB_ZUC_EIA3(zuc_eia3_ooo, job); case IMB_AUTH_ZUC256_EIA3_BITLEN: @@ -257,6 +254,8 @@ FLUSH_JOB_HASH(IMB_MGR *state, IMB_JOB *job) MB_MGR_ZUC_OOO *zuc256_eia3_ooo = state->zuc256_eia3_ooo; switch (job->hash_alg) { + case IMB_AUTH_SNOW3G_UIA2_BITLEN: + return FLUSH_JOB_SNOW3G_UIA2(state); case IMB_AUTH_ZUC_EIA3_BITLEN: return FLUSH_JOB_ZUC_EIA3(zuc_eia3_ooo); case IMB_AUTH_ZUC256_EIA3_BITLEN: diff --git a/lib/aarch64/mb_mgr_snow3g_submit_flush_aarch64.c b/lib/aarch64/mb_mgr_snow3g_submit_flush_aarch64.c new file mode 100644 index 0000000000000000000000000000000000000000..ce55bbfbfe691cd93be64abc2484345e67d58874 --- /dev/null +++ b/lib/aarch64/mb_mgr_snow3g_submit_flush_aarch64.c @@ -0,0 +1,42 @@ +/********************************************************************** + Copyright(c) 2022 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef SUBMIT_JOB_SNOW3G_UEA2 +#define SUBMIT_JOB_SNOW3G_UEA2 submit_job_snow3g_uea2_aarch64_common +#define FLUSH_JOB_SNOW3G_UEA2 flush_job_snow3g_uea2_aarch64_common +#define SUBMIT_JOB_SNOW3G_UIA2 submit_job_snow3g_uia2_aarch64_common +#define FLUSH_JOB_SNOW3G_UIA2 flush_job_snow3g_uia2_aarch64_common +#define SNOW3G_F8_4_BUFFER_INITIALIZE snow3g_f8_4_buffer_initialize_aarch64 +#define SNOW3G_F8_4_BUFFER_STREAM snow3g_f8_4_buffer_stream_aarch64 +#define SNOW3G_F8_1_BUFFER_STREAM snow3g_f8_1_buffer_stream_aarch64 +#define SNOW3G_F9_1_BUFFER_DIGEST snow3g_f9_1_buffer_digest_aarch64 +#define SNOW3G_F9_4_BUFFER_KEYSTREAM snow3g_f9_4_buffer_keystream_aarch64 + +#endif + +#include "mb_mgr_snow3g_submit_flush_common_aarch64.h" diff --git a/lib/aarch64/mb_mgr_snow3g_submit_flush_aarch64_no_aesni.c b/lib/aarch64/mb_mgr_snow3g_submit_flush_aarch64_no_aesni.c new file mode 100644 index 0000000000000000000000000000000000000000..56eafb845a3cccd1c52f82b854b5eaf615e3fe8b --- /dev/null +++ b/lib/aarch64/mb_mgr_snow3g_submit_flush_aarch64_no_aesni.c @@ -0,0 +1,41 @@ +/********************************************************************** + Copyright(c) 2022 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef SUBMIT_JOB_SNOW3G_UEA2 +#define SUBMIT_JOB_SNOW3G_UEA2 submit_job_snow3g_uea2_aarch64_no_aesni +#define FLUSH_JOB_SNOW3G_UEA2 flush_job_snow3g_uea2_aarch64_no_aesni +#define SUBMIT_JOB_SNOW3G_UIA2 submit_job_snow3g_uia2_aarch64_no_aesni +#define FLUSH_JOB_SNOW3G_UIA2 flush_job_snow3g_uia2_aarch64_no_aesni +#define SNOW3G_F8_4_BUFFER_INITIALIZE snow3g_f8_4_buffer_initialize_aarch64_no_aesni +#define SNOW3G_F8_4_BUFFER_STREAM snow3g_f8_4_buffer_stream_aarch64_no_aesni +#define SNOW3G_F8_1_BUFFER_STREAM snow3g_f8_1_buffer_stream_aarch64_no_aesni +#define SNOW3G_F9_1_BUFFER_DIGEST snow3g_f9_1_buffer_digest_aarch64_no_aesni +#define SNOW3G_F9_4_BUFFER_KEYSTREAM snow3g_f9_4_buffer_keystream_aarch64_no_aesni +#endif + +#include "mb_mgr_snow3g_submit_flush_common_aarch64.h" diff --git a/lib/aarch64/mb_mgr_snow3g_submit_flush_common_aarch64.h b/lib/aarch64/mb_mgr_snow3g_submit_flush_common_aarch64.h new file mode 100644 index 0000000000000000000000000000000000000000..0773c2c338af20743764871062bed0ea63a7d659 --- /dev/null +++ b/lib/aarch64/mb_mgr_snow3g_submit_flush_common_aarch64.h @@ -0,0 +1,531 @@ +/********************************************************************** + Copyright(c) 2022 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef MB_MGR_SNOW3G_SUBMIT_FLUSH_AARCH64_H +#define MB_MGR_SNOW3G_SUBMIT_FLUSH_AARCH64_H + +#include "include/ipsec_ooo_mgr.h" +#include "snow3g_internal.h" +#include "snow3g.h" +#include +#include +#include + +#define SNOW3G_MB_MAX_LANES_SIMD 4 + +#define INIT_DONE_MASK 0x0F +#define INIT_ALL_DONE INIT_DONE_MASK +#define JOB_IS_COMPLETED(state, i) \ + (((state->job_in_lane[i]) != NULL) && (state->args.byte_length[i] == 0)) +#define JOB_NOT_INITIALIZED(state, i) \ + ((state->args.INITIALIZED[i] == 0)) +#define JOB_INITIALIZED(state, i) \ + ((state->args.INITIALIZED[i] == 1)) +#define JOB_IS_NULL(state, i) \ + (state->job_in_lane[i] == NULL) + + +IMB_JOB *SUBMIT_JOB_SNOW3G_UEA2(IMB_MGR *state, IMB_JOB *job); +IMB_JOB *FLUSH_JOB_SNOW3G_UEA2(IMB_MGR *state); +IMB_JOB *SUBMIT_JOB_SNOW3G_UIA2(IMB_MGR *state, IMB_JOB *job); +IMB_JOB *FLUSH_JOB_SNOW3G_UIA2(IMB_MGR *state); + +static void snow3g_mb_mgr_insert_uea2_job(MB_MGR_SNOW3G_OOO *state, IMB_JOB *job) +{ + uint64_t used_lane_idx = state->unused_lanes & 0xff; + assert(used_lane_idx < SNOW3G_MB_MAX_LANES_SIMD); + state->unused_lanes = state->unused_lanes >> 8; + state->args.iv[used_lane_idx] = job->iv; + state->args.keys[used_lane_idx] = job->enc_keys; + state->args.in[used_lane_idx] = job->src + job->cipher_start_src_offset_in_bytes; + state->args.out[used_lane_idx] = job->dst; + state->args.byte_length[used_lane_idx] = job->msg_len_to_cipher_in_bits / 8; + state->args.INITIALIZED[used_lane_idx] = 0; + state->lens[used_lane_idx] = job->msg_len_to_cipher_in_bits / 8; + + state->job_in_lane[used_lane_idx] = job; +} + +static void snow3g_mb_mgr_insert_uia2_job(MB_MGR_SNOW3G_OOO *state, IMB_JOB *job) +{ + uint64_t used_lane_idx = state->unused_lanes & 0xff; + assert(used_lane_idx < SNOW3G_MB_MAX_LANES_SIMD); + state->unused_lanes = state->unused_lanes >> 8; + state->num_lanes_inuse++; + state->args.iv[used_lane_idx] = job->u.SNOW3G_UIA2._iv; + state->args.keys[used_lane_idx] = job->u.SNOW3G_UIA2._key; + state->args.in[used_lane_idx] = job->src + job->hash_start_src_offset_in_bytes; + state->args.out[used_lane_idx] = job->auth_tag_output; + state->args.INITIALIZED[used_lane_idx] = 0; + state->lens[used_lane_idx] = job->msg_len_to_hash_in_bits; + state->init_done = state->init_done & (~(1 << used_lane_idx) & 0xff); + + state->job_in_lane[used_lane_idx] = job; +} + +static IMB_JOB *snow3g_mb_mgr_free_uea2_job(MB_MGR_SNOW3G_OOO *state) +{ + IMB_JOB *ret = NULL; + + for (int i = 0; i <= SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (JOB_IS_COMPLETED(state, i)) { + ret = state->job_in_lane[i]; + ret->status |= IMB_STATUS_COMPLETED_CIPHER; + state->job_in_lane[i] = NULL; + state->unused_lanes = state->unused_lanes << 8; + state->unused_lanes |= i; + state->lens[i] = 0; + state->args.INITIALIZED[i] = 0; +#ifdef SAFE_DATA + state->args.LFSR_0[i] = 0; + state->args.LFSR_1[i] = 0; + state->args.LFSR_2[i] = 0; + state->args.LFSR_3[i] = 0; + state->args.LFSR_4[i] = 0; + state->args.LFSR_5[i] = 0; + state->args.LFSR_6[i] = 0; + state->args.LFSR_7[i] = 0; + state->args.LFSR_8[i] = 0; + state->args.LFSR_9[i] = 0; + state->args.LFSR_10[i] = 0; + state->args.LFSR_11[i] = 0; + state->args.LFSR_12[i] = 0; + state->args.LFSR_13[i] = 0; + state->args.LFSR_14[i] = 0; + state->args.LFSR_15[i] = 0; + state->args.FSM_1[i] = 0; + state->args.FSM_2[i] = 0; + state->args.FSM_3[i] = 0; +#endif + break; + } + } + + return ret; +} + +static IMB_JOB *snow3g_mb_mgr_free_uia2_job(MB_MGR_SNOW3G_OOO *state, int i) +{ + IMB_JOB *ret = NULL; + assert(!JOB_IS_NULL(state, i)); + ret = state->job_in_lane[i]; + ret->status |= IMB_STATUS_COMPLETED_AUTH; + state->job_in_lane[i] = NULL; + state->unused_lanes = state->unused_lanes << 8; + state->unused_lanes |= i; + state->num_lanes_inuse--; + state->lens[i] = 0; + state->args.INITIALIZED[i] = 0; + state->init_done = state->init_done & (~(1 << i) & 0xff); + +#ifdef SAFE_DATA + state->args.LFSR_0[i] = 0; + state->args.LFSR_1[i] = 0; + state->args.LFSR_2[i] = 0; + state->args.LFSR_3[i] = 0; + state->args.LFSR_4[i] = 0; + state->args.LFSR_5[i] = 0; + state->args.LFSR_6[i] = 0; + state->args.LFSR_7[i] = 0; + state->args.LFSR_8[i] = 0; + state->args.LFSR_9[i] = 0; + state->args.LFSR_10[i] = 0; + state->args.LFSR_11[i] = 0; + state->args.LFSR_12[i] = 0; + state->args.LFSR_13[i] = 0; + state->args.LFSR_14[i] = 0; + state->args.LFSR_15[i] = 0; + state->args.FSM_1[i] = 0; + state->args.FSM_2[i] = 0; + state->args.FSM_3[i] = 0; + for (int k = 0; k < 5; k++) { + state->ks[i * 5 + k] = 0; + } +#endif + + return ret; +} + +__forceinline +void cpy_snow3g_state_to_ctx_1(snow3gKeyState1_t* ctx, MB_MGR_SNOW3G_OOO* state, const int num_lane) { + SNOW3G_ARGS args = state->args; + ctx->LFSR_S[0] = args.LFSR_0[num_lane]; + ctx->LFSR_S[1] = args.LFSR_1[num_lane]; + ctx->LFSR_S[2] = args.LFSR_2[num_lane]; + ctx->LFSR_S[3] = args.LFSR_3[num_lane]; + ctx->LFSR_S[4] = args.LFSR_4[num_lane]; + ctx->LFSR_S[5] = args.LFSR_5[num_lane]; + ctx->LFSR_S[6] = args.LFSR_6[num_lane]; + ctx->LFSR_S[7] = args.LFSR_7[num_lane]; + ctx->LFSR_S[8] = args.LFSR_8[num_lane]; + ctx->LFSR_S[9] = args.LFSR_9[num_lane]; + ctx->LFSR_S[10] = args.LFSR_10[num_lane]; + ctx->LFSR_S[11] = args.LFSR_11[num_lane]; + ctx->LFSR_S[12] = args.LFSR_12[num_lane]; + ctx->LFSR_S[13] = args.LFSR_13[num_lane]; + ctx->LFSR_S[14] = args.LFSR_14[num_lane]; + ctx->LFSR_S[15] = args.LFSR_15[num_lane]; + ctx->FSM_R1 = args.FSM_1[num_lane]; + ctx->FSM_R2 = args.FSM_2[num_lane]; + ctx->FSM_R3 = args.FSM_3[num_lane]; +} + +__forceinline +void cpy_snow3g_ctx_to_state_after_stream(MB_MGR_SNOW3G_OOO* state, snow3gKeyState4_t* ctx) { + SNOW3G_ARGS *args = &(state->args); + const uint32_t *pLFSR_0 = (const uint32_t *) &ctx->LFSR_X[ctx->iLFSR_X]; + const uint32_t *pLFSR_1 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 1) & 15]; + const uint32_t *pLFSR_2 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 2) & 15]; + const uint32_t *pLFSR_3 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 3) & 15]; + const uint32_t *pLFSR_4 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 4) & 15]; + const uint32_t *pLFSR_5 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 5) & 15]; + const uint32_t *pLFSR_6 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 6) & 15]; + const uint32_t *pLFSR_7 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 7) & 15]; + const uint32_t *pLFSR_8 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 8) & 15]; + const uint32_t *pLFSR_9 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 9) & 15]; + const uint32_t *pLFSR_10 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 10) & 15]; + const uint32_t *pLFSR_11 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 11) & 15]; + const uint32_t *pLFSR_12 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 12) & 15]; + const uint32_t *pLFSR_13 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 13) & 15]; + const uint32_t *pLFSR_14 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 14) & 15]; + const uint32_t *pLFSR_15 = (const uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 15) & 15]; + const uint32_t *pFSM_1 = (const uint32_t *) &ctx->FSM_X[0]; + const uint32_t *pFSM_2 = (const uint32_t *) &ctx->FSM_X[1]; + const uint32_t *pFSM_3 = (const uint32_t *) &ctx->FSM_X[2]; + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (!JOB_IS_COMPLETED(state, i)) { + args->LFSR_0[i] = pLFSR_0[i]; + args->LFSR_1[i] = pLFSR_1[i]; + args->LFSR_2[i] = pLFSR_2[i]; + args->LFSR_3[i] = pLFSR_3[i]; + args->LFSR_4[i] = pLFSR_4[i]; + args->LFSR_5[i] = pLFSR_5[i]; + args->LFSR_6[i] = pLFSR_6[i]; + args->LFSR_7[i] = pLFSR_7[i]; + args->LFSR_8[i] = pLFSR_8[i]; + args->LFSR_9[i] = pLFSR_9[i]; + args->LFSR_10[i] = pLFSR_10[i]; + args->LFSR_11[i] = pLFSR_11[i]; + args->LFSR_12[i] = pLFSR_12[i]; + args->LFSR_13[i] = pLFSR_13[i]; + args->LFSR_14[i] = pLFSR_14[i]; + args->LFSR_15[i] = pLFSR_15[i]; + args->FSM_1[i] = pFSM_1[i]; + args->FSM_2[i] = pFSM_2[i]; + args->FSM_3[i] = pFSM_3[i]; + } + } +} + +__forceinline +void cpy_snow3g_state_to_ctx_after_initialize(snow3gKeyState4_t* ctx, MB_MGR_SNOW3G_OOO* state) { + SNOW3G_ARGS *args = &(state->args); + uint32_t *pLFSR_0 = (uint32_t *) &ctx->LFSR_X[ctx->iLFSR_X]; + uint32_t *pLFSR_1 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 1) & 15]; + uint32_t *pLFSR_2 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 2) & 15]; + uint32_t *pLFSR_3 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 3) & 15]; + uint32_t *pLFSR_4 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 4) & 15]; + uint32_t *pLFSR_5 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 5) & 15]; + uint32_t *pLFSR_6 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 6) & 15]; + uint32_t *pLFSR_7 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 7) & 15]; + uint32_t *pLFSR_8 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 8) & 15]; + uint32_t *pLFSR_9 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 9) & 15]; + uint32_t *pLFSR_10 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 10) & 15]; + uint32_t *pLFSR_11 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 11) & 15]; + uint32_t *pLFSR_12 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 12) & 15]; + uint32_t *pLFSR_13 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 13) & 15]; + uint32_t *pLFSR_14 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 14) & 15]; + uint32_t *pLFSR_15 = (uint32_t *) &ctx->LFSR_X[(ctx->iLFSR_X + 15) & 15]; + uint32_t *pFSM_1 = (uint32_t *) &ctx->FSM_X[0]; + uint32_t *pFSM_2 = (uint32_t *) &ctx->FSM_X[1]; + uint32_t *pFSM_3 = (uint32_t *) &ctx->FSM_X[2]; + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (JOB_INITIALIZED(state, i)) { + pLFSR_0[i] = args->LFSR_0[i]; + pLFSR_1[i] = args->LFSR_1[i]; + pLFSR_2[i] = args->LFSR_2[i]; + pLFSR_3[i] = args->LFSR_3[i]; + pLFSR_4[i] = args->LFSR_4[i]; + pLFSR_5[i] = args->LFSR_5[i]; + pLFSR_6[i] = args->LFSR_6[i]; + pLFSR_7[i] = args->LFSR_7[i]; + pLFSR_8[i] = args->LFSR_8[i]; + pLFSR_9[i] = args->LFSR_9[i]; + pLFSR_10[i] = args->LFSR_10[i]; + pLFSR_11[i] = args->LFSR_11[i]; + pLFSR_12[i] = args->LFSR_12[i]; + pLFSR_13[i] = args->LFSR_13[i]; + pLFSR_14[i] = args->LFSR_14[i]; + pLFSR_15[i] = args->LFSR_15[i]; + pFSM_1[i] = args->FSM_1[i]; + pFSM_2[i] = args->FSM_2[i]; + pFSM_3[i] = args->FSM_3[i]; + } + } +} + +IMB_JOB *SUBMIT_JOB_SNOW3G_UEA2(IMB_MGR *state, + IMB_JOB *job) +{ + MB_MGR_SNOW3G_OOO *snow3g_state = state->snow3g_uea2_ooo; + uint32_t msg_bitlen = job->msg_len_to_cipher_in_bits; + uint32_t msg_bitoff = job->cipher_start_src_offset_in_bits; + + /* Use bit length API if + * - msg length is not a multiple of bytes + * - bit offset is not a multiple of bytes + */ + if ((msg_bitlen & 0x07) || (msg_bitoff & 0x07)) { + IMB_SNOW3G_F8_1_BUFFER_BIT(state, job->enc_keys, job->iv, job->src, + job->dst, msg_bitlen, msg_bitoff); + job->status |= IMB_STATUS_COMPLETED_CIPHER; + return job; + } + + IMB_JOB *ret = NULL; + + snow3g_mb_mgr_insert_uea2_job(snow3g_state, job); + + ret = snow3g_mb_mgr_free_uea2_job(snow3g_state); + if (ret != NULL) + return ret; + + if (snow3g_state->unused_lanes != 0xff) + return NULL; + + uint32_t min_word_len = UINT32_MAX; + snow3gKeyState4_t ctx; + SNOW3G_F8_4_BUFFER_INITIALIZE(&ctx, snow3g_state->args.keys[0], snow3g_state->args.keys[1], + snow3g_state->args.keys[2], snow3g_state->args.keys[3], + snow3g_state->args.iv[0],snow3g_state->args.iv[1], + snow3g_state->args.iv[2],snow3g_state->args.iv[3]); + + cpy_snow3g_state_to_ctx_after_initialize(&ctx, snow3g_state); + + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (JOB_NOT_INITIALIZED(snow3g_state, i)) { + snow3g_state->args.INITIALIZED[i] = 1; + } + min_word_len = (min_word_len < snow3g_state->args.byte_length[i] / SNOW3G_4_BYTES) ? + min_word_len : snow3g_state->args.byte_length[i] / SNOW3G_4_BYTES; + } + + SNOW3G_F8_4_BUFFER_STREAM(&ctx, + snow3g_state->args.in[0],snow3g_state->args.out[0], + snow3g_state->args.in[1],snow3g_state->args.out[1], + snow3g_state->args.in[2],snow3g_state->args.out[2], + snow3g_state->args.in[3],snow3g_state->args.out[3], + min_word_len * SNOW3G_4_BYTES); + + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + snow3g_state->args.in[i] = (uint8_t *)snow3g_state->args.in[i] + + min_word_len * SNOW3G_4_BYTES; + snow3g_state->args.out[i] = (uint8_t *)snow3g_state->args.out[i] + + min_word_len * SNOW3G_4_BYTES; + snow3g_state->args.byte_length[i] -= min_word_len * SNOW3G_4_BYTES; + } + + cpy_snow3g_ctx_to_state_after_stream(snow3g_state, &ctx); + + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + //if less than one word left, finish job here. + if (snow3g_state->args.byte_length[i] < SNOW3G_4_BYTES && + snow3g_state->args.byte_length[i] != 0) { + snow3gKeyState1_t ctx_1; + cpy_snow3g_state_to_ctx_1(&ctx_1, snow3g_state, i); + SNOW3G_F8_1_BUFFER_STREAM(&ctx_1, snow3g_state->args.in[i], + snow3g_state->args.out[i], + snow3g_state->args.byte_length[i]); + snow3g_state->args.byte_length[i] = 0; + } + } + + ret = snow3g_mb_mgr_free_uea2_job(snow3g_state); + +#ifdef SAFE_DATA + //data has been cleard in snow3g_mb_mgr_free_uea2_job. +#endif + + return ret; +} + +IMB_JOB *FLUSH_JOB_SNOW3G_UEA2(IMB_MGR *state) +{ + IMB_JOB *ret = NULL; + MB_MGR_SNOW3G_OOO *snow3g_state = state->snow3g_uea2_ooo; + ret = snow3g_mb_mgr_free_uea2_job(snow3g_state); + + if (ret != NULL) { + return ret; + } + + for (int i = 0; i <= SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (snow3g_state->job_in_lane[i] != NULL && snow3g_state->lens[i] != 0) { + ret = snow3g_state->job_in_lane[i]; + + if (JOB_NOT_INITIALIZED(snow3g_state, i)) { + //if not initialized + IMB_SNOW3G_F8_1_BUFFER(state, snow3g_state->args.keys[i], + snow3g_state->args.iv[i], + snow3g_state->args.in[i], + snow3g_state->args.out[i], + snow3g_state->args.byte_length[i]); + } else { + snow3gKeyState1_t ctx; + cpy_snow3g_state_to_ctx_1(&ctx, snow3g_state, i); + SNOW3G_F8_1_BUFFER_STREAM(&ctx, snow3g_state->args.in[i], + snow3g_state->args.out[i], + snow3g_state->args.byte_length[i]); + } + + ret->status |= IMB_STATUS_COMPLETED_CIPHER; + snow3g_state->lens[i] = 0; + snow3g_state->job_in_lane[i] = NULL; + snow3g_state->unused_lanes = snow3g_state->unused_lanes << 8; + snow3g_state->unused_lanes |= i; + snow3g_state->args.byte_length[i] = 0; + snow3g_state->args.INITIALIZED[i] = 0; +#ifdef SAFE_DATA + snow3g_state->args.LFSR_0[i] = 0; + snow3g_state->args.LFSR_1[i] = 0; + snow3g_state->args.LFSR_2[i] = 0; + snow3g_state->args.LFSR_3[i] = 0; + snow3g_state->args.LFSR_4[i] = 0; + snow3g_state->args.LFSR_5[i] = 0; + snow3g_state->args.LFSR_6[i] = 0; + snow3g_state->args.LFSR_7[i] = 0; + snow3g_state->args.LFSR_8[i] = 0; + snow3g_state->args.LFSR_9[i] = 0; + snow3g_state->args.LFSR_10[i] = 0; + snow3g_state->args.LFSR_11[i] = 0; + snow3g_state->args.LFSR_12[i] = 0; + snow3g_state->args.LFSR_13[i] = 0; + snow3g_state->args.LFSR_14[i] = 0; + snow3g_state->args.LFSR_15[i] = 0; + snow3g_state->args.FSM_1[i] = 0; + snow3g_state->args.FSM_2[i] = 0; + snow3g_state->args.FSM_3[i] = 0; +#endif + return ret; + } + } + return NULL; +} + +IMB_JOB *SUBMIT_JOB_SNOW3G_UIA2(IMB_MGR *state, + IMB_JOB *job) +{ + MB_MGR_SNOW3G_OOO *snow3g_state = state->snow3g_uia2_ooo; + + IMB_JOB *ret = NULL; + + snow3g_mb_mgr_insert_uia2_job(snow3g_state, job); + + if (snow3g_state->unused_lanes != 0xff) + return NULL; + + if (snow3g_state->init_done == 0) { + //all lanes are not initialized. + snow3gKeyState4_t ctx; + SNOW3G_F8_4_BUFFER_INITIALIZE(&ctx, + snow3g_state->args.keys[0], snow3g_state->args.keys[1], + snow3g_state->args.keys[2], snow3g_state->args.keys[3], + snow3g_state->args.iv[0],snow3g_state->args.iv[1], + snow3g_state->args.iv[2],snow3g_state->args.iv[3]); + SNOW3G_F9_4_BUFFER_KEYSTREAM(&ctx, + &snow3g_state->ks[0*5], + &snow3g_state->ks[1*5], + &snow3g_state->ks[2*5], + &snow3g_state->ks[3*5]); + snow3g_state->init_done = INIT_ALL_DONE; + } + + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (snow3g_state->init_done & (1 << i)) { + //pick a initialized lane + SNOW3G_F9_1_BUFFER_DIGEST(&snow3g_state->ks[i*5], snow3g_state->args.in[i], + snow3g_state->lens[i], snow3g_state->args.out[i]); + ret = snow3g_mb_mgr_free_uia2_job(snow3g_state, i); + break; + } + } + return ret; +} + +IMB_JOB *FLUSH_JOB_SNOW3G_UIA2(IMB_MGR *state) +{ + IMB_JOB *ret = NULL; + MB_MGR_SNOW3G_OOO *snow3g_state = state->snow3g_uia2_ooo; + + if (snow3g_state->num_lanes_inuse == 0) { + //empty + return NULL; + } + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (snow3g_state->init_done & (1<ks[i*5], snow3g_state->args.in[i], + snow3g_state->lens[i], snow3g_state->args.out[i]); + ret = snow3g_mb_mgr_free_uia2_job(snow3g_state, i); + return ret; + } + } + int lane_idx; + for (int i = 0; i < SNOW3G_MB_MAX_LANES_SIMD; i++) { + if (!JOB_IS_NULL(snow3g_state, i)) { + snow3g_state->init_done |= (1<args.keys[i] = snow3g_state->args.keys[lane_idx]; + snow3g_state->args.iv[i] = snow3g_state->args.iv[lane_idx]; + } + } + + snow3gKeyState4_t ctx; + SNOW3G_F8_4_BUFFER_INITIALIZE(&ctx, + snow3g_state->args.keys[0], snow3g_state->args.keys[1], + snow3g_state->args.keys[2], snow3g_state->args.keys[3], + snow3g_state->args.iv[0],snow3g_state->args.iv[1], + snow3g_state->args.iv[2],snow3g_state->args.iv[3]); + SNOW3G_F9_4_BUFFER_KEYSTREAM(&ctx, + &snow3g_state->ks[0*5], + &snow3g_state->ks[1*5], + &snow3g_state->ks[2*5], + &snow3g_state->ks[3*5]); + //pick a initialized lane + SNOW3G_F9_1_BUFFER_DIGEST(&snow3g_state->ks[lane_idx*5], snow3g_state->args.in[lane_idx], + snow3g_state->lens[lane_idx], snow3g_state->args.out[lane_idx]); + ret = snow3g_mb_mgr_free_uia2_job(snow3g_state, lane_idx); + return ret; +} + +#endif //MB_MGR_SNOW3G_SUBMIT_FLUSH_AARCH64_H diff --git a/lib/aarch64/snow3g_aarch64.c b/lib/aarch64/snow3g_aarch64.c index 6ff912bb4460d5fa0f6925dff70549cf4cb9a385..4b4172fd6f2380a05fd592295c4ac8d8ef2fa986 100644 --- a/lib/aarch64/snow3g_aarch64.c +++ b/lib/aarch64/snow3g_aarch64.c @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2021 Arm Corporation All rights reserved. + Copyright(c) 2021-2022 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -32,10 +32,16 @@ #define SNOW3G_F8_4_BUFFER snow3g_f8_4_buffer_aarch64 #define SNOW3G_F8_8_BUFFER snow3g_f8_8_buffer_aarch64 #define SNOW3G_F8_N_BUFFER snow3g_f8_n_buffer_aarch64 +#define SNOW3G_F8_4_BUFFER_MULTIKEY snow3g_f8_4_buffer_multikey_aarch64 #define SNOW3G_F8_8_BUFFER_MULTIKEY snow3g_f8_8_buffer_multikey_aarch64 #define SNOW3G_F8_N_BUFFER_MULTIKEY snow3g_f8_n_buffer_multikey_aarch64 #define SNOW3G_F9_1_BUFFER snow3g_f9_1_buffer_aarch64 #define SNOW3G_INIT_KEY_SCHED snow3g_init_key_sched_aarch64 #define SNOW3G_KEY_SCHED_SIZE snow3g_key_sched_size_aarch64 +#define SNOW3G_F8_4_BUFFER_INITIALIZE snow3g_f8_4_buffer_initialize_aarch64 +#define SNOW3G_F8_4_BUFFER_STREAM snow3g_f8_4_buffer_stream_aarch64 +#define SNOW3G_F8_1_BUFFER_STREAM snow3g_f8_1_buffer_stream_aarch64 +#define SNOW3G_F9_1_BUFFER_DIGEST snow3g_f9_1_buffer_digest_aarch64 +#define SNOW3G_F9_4_BUFFER_KEYSTREAM snow3g_f9_4_buffer_keystream_aarch64 #include "snow3g_common_aarch64.h" diff --git a/lib/aarch64/snow3g_aarch64_no_aesni.c b/lib/aarch64/snow3g_aarch64_no_aesni.c index fbc861b23eddfc3373e838a67b5c8c885f52c9b7..f5a9e589bd07fc3f240ac2894efcfa48d0a9768f 100644 --- a/lib/aarch64/snow3g_aarch64_no_aesni.c +++ b/lib/aarch64/snow3g_aarch64_no_aesni.c @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2021 Arm Corporation All rights reserved. + Copyright(c) 2021-2022 Arm Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -34,10 +34,16 @@ #define SNOW3G_F8_4_BUFFER snow3g_f8_4_buffer_aarch64_no_aesni #define SNOW3G_F8_8_BUFFER snow3g_f8_8_buffer_aarch64_no_aesni #define SNOW3G_F8_N_BUFFER snow3g_f8_n_buffer_aarch64_no_aesni +#define SNOW3G_F8_4_BUFFER_MULTIKEY snow3g_f8_4_buffer_multikey_aarch64_no_aesni #define SNOW3G_F8_8_BUFFER_MULTIKEY snow3g_f8_8_buffer_multikey_aarch64_no_aesni #define SNOW3G_F8_N_BUFFER_MULTIKEY snow3g_f8_n_buffer_multikey_aarch64_no_aesni #define SNOW3G_F9_1_BUFFER snow3g_f9_1_buffer_aarch64_no_aesni #define SNOW3G_INIT_KEY_SCHED snow3g_init_key_sched_aarch64_no_aesni #define SNOW3G_KEY_SCHED_SIZE snow3g_key_sched_size_aarch64_no_aesni +#define SNOW3G_F8_4_BUFFER_INITIALIZE snow3g_f8_4_buffer_initialize_aarch64_no_aesni +#define SNOW3G_F8_4_BUFFER_STREAM snow3g_f8_4_buffer_stream_aarch64_no_aesni +#define SNOW3G_F8_1_BUFFER_STREAM snow3g_f8_1_buffer_stream_aarch64_no_aesni +#define SNOW3G_F9_1_BUFFER_DIGEST snow3g_f9_1_buffer_digest_aarch64_no_aesni +#define SNOW3G_F9_4_BUFFER_KEYSTREAM snow3g_f9_4_buffer_keystream_aarch64_no_aesni #include "snow3g_common_aarch64.h" diff --git a/lib/aarch64/snow3g_common_aarch64.h b/lib/aarch64/snow3g_common_aarch64.h index d8da71edd1fbaf9bb3da8a11e9a9051e89a73eb1..f404924250285037b962e2d606de3cee31eb9939 100644 --- a/lib/aarch64/snow3g_common_aarch64.h +++ b/lib/aarch64/snow3g_common_aarch64.h @@ -34,13 +34,14 @@ #include #include "ipsec-mb.h" +#include "snow3g_internal.h" #include "include/wireless_common.h" #include "snow3g.h" #include "snow3g_tables.h" #include "constant_lookup_aarch64.h" #include "clear_regs_mem_aarch64.h" #ifdef NO_AESNI -#include "include/aesni_emu.h" +#include "aesni_emu_aarch64.h" #endif #ifdef SAFE_PARAM #include "include/error.h" @@ -49,258 +50,6 @@ #define CLEAR_MEM clear_mem #define CLEAR_VAR clear_var -#define MAX_KEY_LEN (16) -#define SNOW3G_4_BYTES (4) -#define SNOW3G_8_BYTES (8) -#define SNOW3G_8_BITS (8) -#define SNOW3G_16_BYTES (16) -#define SNOW3G_16_BITS (16) - -#define SNOW3G_BLOCK_SIZE (8) - -#define SNOW3G_KEY_LEN_IN_BYTES (16) /* 128b */ -#define SNOW3G_IV_LEN_IN_BYTES (16) /* 128b */ - -#define SNOW3GCONSTANT (0x1b) - -/* Range of input data for SNOW3G is from 1 to 2^32 bits */ -#define SNOW3G_MIN_LEN 1 -#define SNOW3G_MAX_BITLEN (UINT32_MAX) -#define SNOW3G_MAX_BYTELEN (UINT32_MAX / 8) - -typedef union SafeBuffer { - uint64_t b64; - uint32_t b32[2]; - uint8_t b8[SNOW3G_8_BYTES]; -} SafeBuf; - -typedef struct snow3gKeyState1_s { - /* 16 LFSR stages */ - uint32_t LFSR_S[16]; - /* 3 FSM states */ - uint32_t FSM_R3; - uint32_t FSM_R2; - uint32_t FSM_R1; -} DECLARE_ALIGNED(snow3gKeyState1_t, 16); - -typedef struct snow3gKeyState4_s { - /* 16 LFSR stages */ - uint32x4_t LFSR_X[16]; - /* 3 FSM states */ - uint32x4_t FSM_X[3]; - uint32_t iLFSR_X; -} snow3gKeyState4_t; - -/** - * @brief Finds minimum 32-bit value in an array - * @return Min 32-bit value - */ -static inline uint32_t -length_find_min(const uint32_t *out_array, const size_t dim_array) -{ - size_t i; - uint32_t min = 0; - - if (dim_array > 0) - min = out_array[0]; - - for (i = 1; i < dim_array; i++) - if (out_array[i] < min) - min = out_array[i]; - - return min; -} - -/** - * @brief Subtracts \a subv from a vector of 32-bit words - */ -static inline void -length_sub(uint32_t *out_array, const size_t dim_array, const uint32_t subv) -{ - size_t i; - - for (i = 0; i < dim_array; i++) - out_array[i] -= subv; -} - -#ifdef SAFE_PARAM -/** - * @brief Checks vector of length values against 0 and SNOW3G_MAX_BYTELEN values - * @retval 0 incorrect length value found - * @retval 1 all OK - */ -static inline uint32_t -length_check(const uint32_t *out_array, const size_t dim_array) -{ - size_t i; - - if (out_array == NULL) { - imb_set_errno(NULL, IMB_ERR_CIPH_LEN); - return 0; - } - - for (i = 0; i < dim_array; i++) { - if ((out_array[i] == 0) || - (out_array[i] > SNOW3G_MAX_BYTELEN)) { - imb_set_errno(NULL, IMB_ERR_CIPH_LEN); - return 0; - } - } - - return 1; -} -#endif - -/** - * @brief Copies 4 32-bit length values into an array - */ -static inline void -length_copy_4(uint32_t *out_array, - const uint32_t length1, const uint32_t length2, - const uint32_t length3, const uint32_t length4) -{ - out_array[0] = length1; - out_array[1] = length2; - out_array[2] = length3; - out_array[3] = length4; -} - -/** - * @brief Copies 8 32-bit length values into an array - */ -static inline void -length_copy_8(uint32_t *out_array, - const uint32_t length1, const uint32_t length2, - const uint32_t length3, const uint32_t length4, - const uint32_t length5, const uint32_t length6, - const uint32_t length7, const uint32_t length8) -{ - out_array[0] = length1; - out_array[1] = length2; - out_array[2] = length3; - out_array[3] = length4; - out_array[4] = length5; - out_array[5] = length6; - out_array[6] = length7; - out_array[7] = length8; -} - -#ifdef SAFE_PARAM -/** - * @brief Checks vector of pointers against NULL - * @retval 0 incorrect pointer found - * @retval 1 all OK - */ -static inline int -ptr_check(void *out_array[], const size_t dim_array, const int errnum) -{ - size_t i; - - if (out_array == NULL) { - imb_set_errno(NULL, errnum); - return 0; - } - for (i = 0; i < dim_array; i++) - if (out_array[i] == NULL) { - imb_set_errno(NULL, errnum); - return 0; - } - return 1; -} -#endif - -#ifdef SAFE_PARAM -/** - * @brief Checks vector of const pointers against NULL - * @retval 0 incorrect pointer found - * @retval 1 all OK - */ -static inline int -cptr_check(const void * const out_array[], - const size_t dim_array, - const int errnum) -{ - size_t i; - - if (out_array == NULL) { - imb_set_errno(NULL, errnum); - return 0; - } - for (i = 0; i < dim_array; i++) - if (out_array[i] == NULL) { - imb_set_errno(NULL, errnum); - return 0; - } - - return 1; -} -#endif - -/** - * @brief Copies 4 pointers into an array - */ -static inline void -ptr_copy_4(void *out_array[], - void *ptr1, void *ptr2, void *ptr3, void *ptr4) -{ - out_array[0] = ptr1; - out_array[1] = ptr2; - out_array[2] = ptr3; - out_array[3] = ptr4; -} - -/** - * @brief Copies 4 const pointers into an array - */ -static inline void -cptr_copy_4(const void *out_array[], - const void *ptr1, const void *ptr2, - const void *ptr3, const void *ptr4) -{ - out_array[0] = ptr1; - out_array[1] = ptr2; - out_array[2] = ptr3; - out_array[3] = ptr4; -} - -/** - * @brief Copies 8 pointers into an array - */ -static inline void -ptr_copy_8(void *out_array[], - void *ptr1, void *ptr2, void *ptr3, void *ptr4, - void *ptr5, void *ptr6, void *ptr7, void *ptr8) -{ - out_array[0] = ptr1; - out_array[1] = ptr2; - out_array[2] = ptr3; - out_array[3] = ptr4; - out_array[4] = ptr5; - out_array[5] = ptr6; - out_array[6] = ptr7; - out_array[7] = ptr8; -} - -/** - * @brief Copies 8 const pointers into an array - */ -static inline void -cptr_copy_8(const void *out_array[], - const void *ptr1, const void *ptr2, - const void *ptr3, const void *ptr4, - const void *ptr5, const void *ptr6, - const void *ptr7, const void *ptr8) -{ - out_array[0] = ptr1; - out_array[1] = ptr2; - out_array[2] = ptr3; - out_array[3] = ptr4; - out_array[4] = ptr5; - out_array[5] = ptr6; - out_array[6] = ptr7; - out_array[7] = ptr8; -} - /** * @brief Wrapper for safe lookup of 16 indexes in 256x8-bit table * @param[in] indexes vector of 16x8-bit indexes to be looked up @@ -324,6 +73,27 @@ static inline void ShiftTwiceLFSR_1(snow3gKeyState1_t *pCtx) pCtx->LFSR_S[i] = pCtx->LFSR_S[i + 2]; } +#ifdef NO_AESNI +static inline void emulate_AESENC_WITHOUT_SHIFTROW(union xmm_reg *dst, + const union xmm_reg *key) +{ + union xmm_reg tmp = *dst; + + substitute_bytes(&tmp, &tmp); + mix_columns(&tmp, &tmp); + xor_xmm(dst, &tmp, key); +} + +static inline void emulate_AESENCLAST_WITHOUT_SHIFTROW(union xmm_reg *dst, + const union xmm_reg *src) +{ + union xmm_reg tmp = *dst; + + substitute_bytes(&tmp, &tmp); + xor_xmm(dst, &tmp, src); +} +#endif + /** * @brief SNOW3G S2 mix column correction function * @@ -408,7 +178,7 @@ static inline uint32_t S1_box(const uint32_t x) v.dword[0] = v.dword[1] = v.dword[2] = v.dword[3] = x; - emulate_AESENC(&v, &key); + emulate_AESENC_WITHOUT_SHIFTROW(&v, &key); return v.dword[0]; #else uint32x4_t dup_x; @@ -426,8 +196,8 @@ static inline uint32_t S1_box(const uint32_t x) /** * @brief Sbox S1 maps a 2x32bit input to a 2x32bit output * - * @param[in] x1 32-bit word to be passed through S1 box - * @param[in] x2 32-bit word to be passed through S1 box + * @param[in/out] x1 32-bit word to be passed through S1 box + * @param[in/out] x2 32-bit word to be passed through S1 box */ static inline void S1_box_2(uint32_t *x1, uint32_t *x2) { @@ -462,69 +232,27 @@ static inline void S1_box_2(uint32_t *x1, uint32_t *x2) static inline uint32x4_t S1_box_4(const uint32x4_t x) { #ifdef NO_AESNI - union xmm_reg key, v, vt; + union xmm_reg key, v; + v.dword[0] = vgetq_lane_u32(x, 0); + v.dword[1] = vgetq_lane_u32(x, 1); + v.dword[2] = vgetq_lane_u32(x, 2); + v.dword[3] = vgetq_lane_u32(x, 3); key.qword[0] = key.qword[1] = 0; - /* - * - Broadcast 32-bit word across XMM - * - Perform AES operations - */ - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = vgetq_lane_u32(x, 0); - emulate_AESENC(&vt, &key); - v.dword[0] = vt.dword[0]; - - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = vgetq_lane_u32(x, 1); - emulate_AESENC(&vt, &key); - v.dword[1] = vt.dword[0]; - - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = vgetq_lane_u32(x, 2); - emulate_AESENC(&vt, &key); - v.dword[2] = vt.dword[0]; - - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = vgetq_lane_u32(x, 3); - emulate_AESENC(&vt, &key); - v.dword[3] = vt.dword[0]; - + emulate_AESENC_WITHOUT_SHIFTROW(&v, &key); return vld1q_u32(&v.dword[0]); #else - const uint8x16_t m_zero = vdupq_n_u8(0); - uint8x16_t m1, m2, m3, m4; - uint32x4_t r1, r2; - - m1 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(x, 0))); - m2 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(x, 1))); - m3 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(x, 2))); - m4 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(x, 3))); + const uint8x16_t inv_aes_shift_row = {0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b, + 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03}; + uint8x16_t new_x = vreinterpretq_u8_u32(x); + new_x = vqtbl1q_u8(new_x, inv_aes_shift_row); - m1 = vaeseq_u8(m1, m_zero); - m1 = vaesmcq_u8(m1); - m2 = vaeseq_u8(m2, m_zero); - m2 = vaesmcq_u8(m2); - m3 = vaeseq_u8(m3, m_zero); - m3 = vaesmcq_u8(m3); - m4 = vaeseq_u8(m4, m_zero); - m4 = vaesmcq_u8(m4); - - /* - * Put results of AES operations back into - * two vectors of 32-bit words - * - * First step: - * r1 = [ 0-31 m1 | 0-31 m2 | 32-63 m1 | 32-63 m2 ] - * r2 = [ 0-31 m3 | 0-31 m4 | 32-63 m3 | 32-63 m4 ] - */ - r1 = vzip1q_u32(vreinterpretq_u32_u8(m1), vreinterpretq_u32_u8(m2)); - r2 = vzip1q_u32(vreinterpretq_u32_u8(m3), vreinterpretq_u32_u8(m4)); - r1 = vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(r1), - vreinterpretq_u64_u32(r2))); - /* - * The last step: - * r1 = [ 0-63 m1 | 0-63 m3 ] => - * [ 0-31 m1 | 0-31 m2 | 0-31 m3 | 0-31 m4 ] - */ - - return r1; + const uint8x16_t m_zero = vdupq_n_u8(0); + new_x = vaeseq_u8(new_x, m_zero); + new_x = vaesmcq_u8(new_x); + uint32x4_t ret = vreinterpretq_u32_u8(new_x); + return ret; #endif } @@ -552,8 +280,8 @@ static inline uint32_t S2_box(const uint32_t x) v_fixup = v; - emulate_AESENC(&v, &key); - emulate_AESENCLAST(&v_fixup, &key); + emulate_AESENC_WITHOUT_SHIFTROW(&v, &key); + emulate_AESENCLAST_WITHOUT_SHIFTROW(&v_fixup, &key); const uint8x16_t ret_mixc = vreinterpretq_u8_u32( vld1q_u32(&v.dword[0])); @@ -656,96 +384,33 @@ static inline uint32x4_t S2_box_4(const uint32x4_t x) /* use AESNI operations for the rest of the S2 box */ #ifdef NO_AESNI - union xmm_reg key, v, f; - union xmm_reg vt, ft; - + union xmm_reg key, vt, ft; key.qword[0] = key.qword[1] = 0; - /* - * - Broadcast 32-bit word across XMM and - * perform AES operations - * - Save result 32-bit words in v and f vectors. - * 'f' is used for fix-up of mixed columns only - */ - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = - vgetq_lane_u32(new_x, 0); - ft = vt; - emulate_AESENC(&vt, &key); - emulate_AESENCLAST(&ft, &key); - v.dword[0] = vt.dword[0]; - f.dword[0] = ft.dword[0]; - - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = - vgetq_lane_u32(new_x, 1); - ft = vt; - emulate_AESENC(&vt, &key); - emulate_AESENCLAST(&ft, &key); - v.dword[1] = vt.dword[0]; - f.dword[1] = ft.dword[0]; - - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = - vgetq_lane_u32(new_x, 2); + vt.dword[0] = vgetq_lane_u32(new_x, 0); + vt.dword[1] = vgetq_lane_u32(new_x, 1); + vt.dword[2] = vgetq_lane_u32(new_x, 2); + vt.dword[3] = vgetq_lane_u32(new_x, 3); ft = vt; - emulate_AESENC(&vt, &key); - emulate_AESENCLAST(&ft, &key); - v.dword[2] = vt.dword[0]; - f.dword[2] = ft.dword[0]; - vt.dword[0] = vt.dword[1] = vt.dword[2] = vt.dword[3] = - vgetq_lane_u32(new_x, 3); - ft = vt; - emulate_AESENC(&vt, &key); - emulate_AESENCLAST(&ft, &key); - v.dword[3] = vt.dword[0]; - f.dword[3] = ft.dword[0]; + emulate_AESENC_WITHOUT_SHIFTROW(&vt, &key); + emulate_AESENCLAST_WITHOUT_SHIFTROW(&ft, &key); - return s2_mixc_fixup_4(vreinterpretq_u8_u32(vld1q_u32(&f.dword[0])), - vreinterpretq_u8_u32(vld1q_u32(&v.dword[0]))); + return s2_mixc_fixup_4(vreinterpretq_u8_u32(vld1q_u32(&ft.dword[0])), + vreinterpretq_u8_u32(vld1q_u32(&vt.dword[0]))); #else - const uint8x16_t zero = vdupq_n_u8(0); - uint8x16_t m1, m2, m3, m4, f1, f2, f3, f4, mixc, no_mixc; - uint32x4_t r1, r2; + const uint8x16_t inv_aes_shift_row = {0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b, + 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03}; + uint8x16_t new_new_x = vreinterpretq_u8_u32(new_x); + new_new_x = vqtbl1q_u8(new_new_x, inv_aes_shift_row); - m1 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(new_x, 0))); - m2 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(new_x, 1))); - m3 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(new_x, 2))); - m4 = vreinterpretq_u8_u32(vdupq_n_u32(vgetq_lane_u32(new_x, 3))); - - f1 = vaeseq_u8(m1, zero); // no_mixc - m1 = vaesmcq_u8(f1); - f2 = vaeseq_u8(m2, zero); - m2 = vaesmcq_u8(f2); - f3 = vaeseq_u8(m3, zero); - m3 = vaesmcq_u8(f3); - f4 = vaeseq_u8(m4, zero); - m4 = vaesmcq_u8(f4); - - /* - * Put results of AES operations back into - * two vectors of 32-bit words - * - * First step: - * m1 = [ 0-31 m1 | 0-31 m2 | 32-63 m1 | 32-63 m2 ] - * m3 = [ 0-31 m3 | 0-31 m4 | 32-63 m3 | 32-63 m4 ] - */ - /* - * The last step: - * m1 = [ 0-63 m1 | 0-63 m3 ] => - * [ 0-31 m1 | 0-31 m2 | 0-31 m3 | 0-31 m4 ] - * f1 = [ 0-63 f1 | 0-63 f3 ] => - * [ 0-31 f1 | 0-31 f2 | 0-31 f3 | 0-31 f4 ] - */ - r1 = vzip1q_u32(vreinterpretq_u32_u8(m1), vreinterpretq_u32_u8(m2)); - r2 = vzip1q_u32(vreinterpretq_u32_u8(m3), vreinterpretq_u32_u8(m4)); - mixc = vreinterpretq_u8_u64(vzip1q_u64(vreinterpretq_u64_u32(r1), - vreinterpretq_u64_u32(r2))); + const uint8x16_t m_zero = vdupq_n_u8(0); + uint8x16_t ret_nomixc, ret_mixc; - r1 = vzip1q_u32(vreinterpretq_u32_u8(f1), vreinterpretq_u32_u8(f2)); - r2 = vzip1q_u32(vreinterpretq_u32_u8(f3), vreinterpretq_u32_u8(f4)); - no_mixc = vreinterpretq_u8_u64(vzip1q_u64(vreinterpretq_u64_u32(r1), - vreinterpretq_u64_u32(r2))); + ret_nomixc = vaeseq_u8(new_new_x, m_zero); + ret_mixc = vaesmcq_u8(ret_nomixc); - return s2_mixc_fixup_4(no_mixc, mixc); + return s2_mixc_fixup_4(ret_nomixc, ret_mixc); #endif } @@ -1234,8 +899,8 @@ snow3gStateInitialize_1(snow3gKeyState1_t *pCtx, /** * @brief Generates 5 words of key stream used in the initial stages of F9. * - * @param[in] pCtx Context where the scheduled keys are stored - * @param[in/out] pKeyStream Pointer to the generated keystream + * @param[in/out] pCtx Context where the scheduled keys are stored + * @param[out] pKeyStream Pointer to the generated keystream */ static inline void snow3g_f9_keystream_words(snow3gKeyState1_t *pCtx, uint32_t *pKeyStream) @@ -1253,7 +918,7 @@ static inline void snow3g_f9_keystream_words(snow3gKeyState1_t *pCtx, /** * @brief LFSR array shift by one (4 lanes) - * @param[in] pCtx Context where the scheduled keys are stored + * @param[in/out] pCtx Context where the scheduled keys are stored */ static inline void ShiftLFSR_4(snow3gKeyState4_t *pCtx) { @@ -1331,7 +996,7 @@ static inline uint32x4_t C0_C11_4(const uint32x4_t L0, const uint32x4_t L11) * ^ table_Alpha_mul[LFSR[0] >> 24] * ^ LFSR[2] ^ LFSR[0] << 8 ^ LFSR[11] >> 8 * - * @param[in] pCtx Context where the scheduled keys are stored + * @param[in/out] pCtx Context where the scheduled keys are stored */ static inline void ClockLFSR_4(snow3gKeyState4_t *pCtx) { @@ -1357,7 +1022,7 @@ static inline void ClockLFSR_4(snow3gKeyState4_t *pCtx) * * It operates on 4 packets/lanes at a time * - * @param[in] pCtx Context where the scheduled keys are stored + * @param[in/out] pCtx Context where the scheduled keys are stored * @return 4 x 4bytes of key stream */ static inline uint32x4_t ClockFSM_4(snow3gKeyState4_t *pCtx) @@ -1381,7 +1046,7 @@ static inline uint32x4_t ClockFSM_4(snow3gKeyState4_t *pCtx) /** * @brief Generates 4 bytes of key stream 1 buffer at a time * - * @param[in] pCtx Context where the scheduled keys are stored + * @param[in/out] pCtx Context where the scheduled keys are stored * @return 4 bytes of key stream */ static inline uint32_t snow3g_keystream_1_4(snow3gKeyState1_t *pCtx) @@ -1396,7 +1061,7 @@ static inline uint32_t snow3g_keystream_1_4(snow3gKeyState1_t *pCtx) /** * @brief Generates 8 bytes of key stream for 1 buffer at a time * - * @param[in] pCtx Context where the scheduled keys are stored + * @param[in/out] pCtx Context where the scheduled keys are stored * @return 8 bytes of a key stream */ static inline uint64_t snow3g_keystream_1_8(snow3gKeyState1_t *pCtx) @@ -1472,8 +1137,7 @@ static inline uint64_t snow3g_keystream_1_8(snow3gKeyState1_t *pCtx) /** * @brief Generates 4 bytes of key stream 4 buffers at a time * - * @param[in] pCtx Context where the scheduled keys are stored - * @param[in/out] pKeyStream Pointer to generated key stream + * @param[in/out] pCtx Context where the scheduled keys are stored */ static inline uint32x4_t snow3g_keystream_4_4(snow3gKeyState4_t *pCtx) { @@ -1487,9 +1151,9 @@ static inline uint32x4_t snow3g_keystream_4_4(snow3gKeyState4_t *pCtx) /** * @brief Generates 8 bytes of key stream 4 buffers at a time * - * @param[in] pCtx Context where the scheduled keys are stored - * @param[in/out] pKeyStreamLo Pointer to lower end of generated key stream - * @param[in/out] pKeyStreamHi Pointer to higher end of generated key stream + * @param[in/out] pCtx Context where the scheduled keys are stored + * @param[out] pKeyStreamLo Pointer to lower end of generated key stream + * @param[out] pKeyStreamHi Pointer to higher end of generated key stream */ static inline void snow3g_keystream_4_8(snow3gKeyState4_t *pCtx, uint32x4_t *pKeyStreamLo, @@ -1570,8 +1234,8 @@ static inline void snow3g_keystream_4_8(snow3gKeyState4_t *pCtx, /** * @brief Generates 16 bytes of key stream 4 buffers at a time * - * @param[in] pCtx Context where the scheduled keys are stored - * @param[in/out] pKeyStream Pointer to store generated key stream + * @param[in/out] pCtx Context where the scheduled keys are stored + * @param[out] pKeyStream Pointer to store generated key stream */ static inline void snow3g_keystream_4_16(snow3gKeyState4_t *pCtx, uint32x4_t pKeyStream[4]) @@ -1612,7 +1276,7 @@ static inline void snow3g_keystream_4_16(snow3gKeyState4_t *pCtx, /** * @brief Initializes the key schedule for 4 buffers for SNOW3G f8/f9. * - * @param [in] pCtx Context where the scheduled keys are stored + * @param [in/out] pCtx Context where the scheduled keys are stored * @param [in] pKeySched Key schedule * @param [in] pIV1 IV for buffer 1 * @param [in] pIV2 IV for buffer 2 @@ -1633,7 +1297,7 @@ snow3gStateInitialize_4(snow3gKeyState4_t *pCtx, /* Load complete 128b IV into register */ static const uint64_t sm[2] = { - 0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL + 0x0405060700010203ULL, 0x0c0d0e0f08090a0bULL }; R = vld1q_u32(pIV1); @@ -1706,6 +1370,112 @@ snow3gStateInitialize_4(snow3gKeyState4_t *pCtx, } } +/** + * @brief Initializes the four key schedule for 4 buffers for SNOW3G f8/f9. + * + * @param [in/out] pCtx Context where the scheduled keys are stored + * @param [in] pKeySched1 Key1 schedule + * @param [in] pKeySched2 Key2 schedule + * @param [in] pKeySched3 Key3 schedule + * @param [in] pKeySched4 Key4 schedule + * @param [in] pIV1 IV for buffer 1 + * @param [in] pIV2 IV for buffer 2 + * @param [in] pIV3 IV for buffer 3 + * @param [in] pIV4 IV for buffer 4 + */ +static inline void +snow3gStateInitialize_4_multikey(snow3gKeyState4_t *pCtx, + const snow3g_key_schedule_t *pKeySched1, + const snow3g_key_schedule_t *pKeySched2, + const snow3g_key_schedule_t *pKeySched3, + const snow3g_key_schedule_t *pKeySched4, + const void *pIV1, const void *pIV2, + const void *pIV3, const void *pIV4) +{ + uint32x4_t R, S, T, U; + uint32x4_t T0, T1; + int i; + + /* Initialize the LFSR table from constants, Keys, and IV */ + + /* Load complete 128b IV into register */ + static const uint64_t sm[2] = { + 0x0405060700010203ULL, 0x0c0d0e0f08090a0bULL + }; + + R = vld1q_u32(pIV1); + S = vld1q_u32(pIV2); + T = vld1q_u32(pIV3); + U = vld1q_u32(pIV4); + + uint32x4_t VK[4]; + uint32x4_t VL[4]; + /* initialize the array block */ + for (i = 0; i < 4; i++) { + VK[i] = vsetq_lane_u32(pKeySched1->k[i], VK[i], 0); + VK[i] = vsetq_lane_u32(pKeySched2->k[i], VK[i], 1); + VK[i] = vsetq_lane_u32(pKeySched3->k[i], VK[i], 2); + VK[i] = vsetq_lane_u32(pKeySched4->k[i], VK[i], 3); + VL[i] = ~VK[i]; + + pCtx->LFSR_X[i + 4] = + pCtx->LFSR_X[i + 12] = VK[i]; + pCtx->LFSR_X[i + 0] = + pCtx->LFSR_X[i + 8] = VL[i]; + } + + /* Update the schedule structure with IVs */ + /* Store the 4 IVs in LFSR by a column/row matrix swap + * after endianness correction */ + + /* endianness swap */ + const uint64x2_t swapMask = vld1q_u64(sm); + + R = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(R), + vreinterpretq_u8_u64(swapMask))); + S = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(S), + vreinterpretq_u8_u64(swapMask))); + T = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(T), + vreinterpretq_u8_u64(swapMask))); + U = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(U), + vreinterpretq_u8_u64(swapMask))); + + /* row/column dword inversion */ + T0 = vzip1q_u32(R, S); + R = vzip2q_u32(R, S); + T1 = vzip1q_u32(T, U); + T = vzip2q_u32(T, U); + + /* row/column qword inversion */ + U = vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(R), + vreinterpretq_u64_u32(T))); + T = vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(R), + vreinterpretq_u64_u32(T))); + S = vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(T0), + vreinterpretq_u64_u32(T1))); + R = vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(T0), + vreinterpretq_u64_u32(T1))); + + /* IV ^ LFSR */ + pCtx->LFSR_X[15] = pCtx->LFSR_X[15] ^ U; + pCtx->LFSR_X[12] = pCtx->LFSR_X[12] ^ T; + pCtx->LFSR_X[10] = pCtx->LFSR_X[10] ^ S; + pCtx->LFSR_X[9] = pCtx->LFSR_X[9] ^ R; + pCtx->iLFSR_X = 0; + + /* FSM initialization */ + pCtx->FSM_X[0] = pCtx->FSM_X[1] = + pCtx->FSM_X[2] = vdupq_n_u32(0); + + /* Initialisation rounds */ + for (i = 0; i < 32; i++) { + T1 = ClockFSM_4(pCtx); + + ClockLFSR_4(pCtx); + pCtx->LFSR_X[(pCtx->iLFSR_X + 15) & 15] = + pCtx->LFSR_X[(pCtx->iLFSR_X + 15) & 15] ^ T1; + } +} static inline void preserve_bits(uint64_t *KS, @@ -1736,7 +1506,7 @@ preserve_bits(uint64_t *KS, /** * @brief Core SNOW3G F8 bit algorithm for the 3GPP confidentiality algorithm * - * @param[in] pCtx Context where the scheduled keys are stored + * @param[in/out]pCtx Context where the scheduled keys are stored * @param[in] pIn Input buffer * @param[out] pOut Output buffer * @param[in] lengthInBits length in bits of the data to be encrypted @@ -1938,7 +1708,7 @@ static inline void f8_snow3g(snow3gKeyState1_t *pCtx, * @brief Extracts one state from a 4 buffer state structure. * * @param[in] pSrcState Pointer to the source state - * @param[in] pDstState Pointer to the destination state + * @param[out] pDstState Pointer to the destination state * @param[in] NumBuffer Buffer number */ static inline void snow3gStateConvert_4(const snow3gKeyState4_t *pSrcState, @@ -2148,10 +1918,10 @@ void SNOW3G_F8_1_BUFFER_BIT(const snow3g_key_schedule_t *pHandle, * @param[in] pIV1 pointer to IV * @param[in] pIV2 pointer to IV * @param[in] pBufIn1 pointer to an input buffer - * @param[in] pBufOut1 pointer to an output buffer + * @param[out]pBufOut1 pointer to an output buffer * @param[in] lenInBytes1 message size in bytes * @param[in] pBufIn2 pointer to an input buffer - * @param[in] pBufOut2 pointer to an output buffer + * @param[out]pBufOut2 pointer to an output buffer * @param[in] lenInBytes2 message size in bytes */ void SNOW3G_F8_2_BUFFER(const snow3g_key_schedule_t *pHandle, @@ -2224,6 +1994,166 @@ void SNOW3G_F8_2_BUFFER(const snow3g_key_schedule_t *pHandle, } + +/** + * @brief Four buffer F8 encrypt/decrypt with different key schedule + * + * Four packets enc/dec with different key schedule. + * The 4 keys and IVs are independent and are passed as an array of pointers. + * Each buffer and data length are separate. + * + * @param[in] pHandle1 pointer to precomputed key1 schedule + * @param[in] pHandle2 pointer to precomputed key2 schedule + * @param[in] pHandle3 pointer to precomputed key3 schedule + * @param[in] pHandle4 pointer to precomputed key4 schedule + * @param[in] pIV1 pointer to IV1 + * @param[in] pIV2 pointer to IV2 + * @param[in] pIV3 pointer to IV3 + * @param[in] pIV4 pointer to IV4 + * @param[in] pBufferIn1 pointer to an input buffer + * @param[out] pBufferOut1 pointer to an output buffer + * @param[in] lengthInBytes1 message size in bytes + * @param[in] pBufferIn2 pointer to an input buffer + * @param[out] pBufferOut2 pointer to an output buffer + * @param[in] lengthInBytes2 message size in bytes + * @param[in] pBufferIn3 pointer to an input buffer + * @param[out] pBufferOut3 pointer to an output buffer + * @param[in] lengthInBytes3 message size in bytes + * @param[in] pBufferIn4 pointer to an input buffer + * @param[out] pBufferOut4 pointer to an output buffer + * @param[in] lengthInBytes4 message size in bytes + */ +void SNOW3G_F8_4_BUFFER_MULTIKEY(const snow3g_key_schedule_t *pHandle1, + const snow3g_key_schedule_t *pHandle2, + const snow3g_key_schedule_t *pHandle3, + const snow3g_key_schedule_t *pHandle4, + const void *pIV1, + const void *pIV2, + const void *pIV3, + const void *pIV4, + const void *pBufferIn1, + void *pBufferOut1, + const uint32_t lengthInBytes1, + const void *pBufferIn2, + void *pBufferOut2, + const uint32_t lengthInBytes2, + const void *pBufferIn3, + void *pBufferOut3, + const uint32_t lengthInBytes3, + const void *pBufferIn4, + void *pBufferOut4, + const uint32_t lengthInBytes4) +{ + const size_t num_lanes = 4; + snow3gKeyState4_t ctx; + uint32_t lenInBytes[4]; + uint8_t *pBufferOut[4]; + const uint8_t *pBufferIn[4]; + uint32_t bytes, qwords, i; + + length_copy_4(lenInBytes, lengthInBytes1, lengthInBytes2, + lengthInBytes3, lengthInBytes4); + + cptr_copy_4((const void **)pBufferIn, + pBufferIn1, pBufferIn2, pBufferIn3, pBufferIn4); + + ptr_copy_4((void **)pBufferOut, pBufferOut1, pBufferOut2, + pBufferOut3, pBufferOut4); + +#ifdef SAFE_PARAM + /* reset error status */ + imb_set_errno(NULL, 0); + if ((pHandle1 == NULL) || (pHandle2 == NULL) || + (pHandle3 == NULL) || (pHandle4 == NULL)) { + imb_set_errno(NULL, IMB_ERR_NULL_EXP_KEY); + return; + } + if ((pIV1 == NULL) || pIV2 == NULL || + (pIV3 == NULL) || (pIV4 == NULL)) { + imb_set_errno(NULL, IMB_ERR_NULL_IV); + return; + } + if (!cptr_check((const void * const *)pBufferIn, num_lanes, + IMB_ERR_NULL_SRC)) + return; + if (!ptr_check((void **)pBufferOut, num_lanes, IMB_ERR_NULL_DST)) + return; + if (!length_check(lenInBytes, num_lanes)) + return; +#endif + +#ifdef SAFE_DATA + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ + + /* find min common length */ + bytes = length_find_min(lenInBytes, num_lanes); + qwords = bytes / SNOW3G_8_BYTES; + + /* subtract min common length from all buffers */ + length_sub(lenInBytes, num_lanes, qwords * SNOW3G_8_BYTES); + + /* Initialize the schedule from the IV */ + snow3gStateInitialize_4_multikey(&ctx, pHandle1, pHandle2, pHandle3, pHandle4, pIV1, pIV2, pIV3, pIV4); + + /* Clock FSM and LFSR once, ignore the key stream */ + (void) snow3g_keystream_4_4(&ctx); + + /* generates 8 bytes at a time on all streams */ + while (qwords >= 2) { + uint32x4_t ks[4]; + + snow3g_keystream_4_16(&ctx, ks); + + for (i = 0; i < num_lanes; i++) { + const uint32x4_t in = vld1q_u32((const uint32_t *)pBufferIn[i]); + + vst1q_u32((uint32_t *)pBufferOut[i], in ^ ks[i]); + + pBufferOut[i] += (2 * SNOW3G_8_BYTES); + pBufferIn[i] += (2 * SNOW3G_8_BYTES); + } + + qwords = qwords - 2; + } + + while (qwords--) { + uint32x4_t H, L; /* 4 bytes of key stream */ + + snow3g_keystream_4_8(&ctx, &L, &H); + + pBufferIn[0] = xor_keystrm_rev(pBufferOut[0], pBufferIn[0], + vgetq_lane_u64(vreinterpretq_u64_u32(L), 0)); + pBufferIn[1] = xor_keystrm_rev(pBufferOut[1], pBufferIn[1], + vgetq_lane_u64(vreinterpretq_u64_u32(L), 1)); + pBufferIn[2] = xor_keystrm_rev(pBufferOut[2], pBufferIn[2], + vgetq_lane_u64(vreinterpretq_u64_u32(H), 0)); + pBufferIn[3] = xor_keystrm_rev(pBufferOut[3], pBufferIn[3], + vgetq_lane_u64(vreinterpretq_u64_u32(H), 1)); + + for (i = 0; i < num_lanes; i++) + pBufferOut[i] += SNOW3G_8_BYTES; + } + + /* process the remaining of each buffer + * - extract the LFSR and FSM structures + * - Continue process 1 buffer + */ + for (i = 0; i < num_lanes; i++) { + snow3gKeyState1_t ctx_t; + + if (lenInBytes[i] == 0) + continue; + snow3gStateConvert_4(&ctx, &ctx_t, i); + f8_snow3g(&ctx_t, pBufferIn[i], pBufferOut[i], lenInBytes[i]); + } +#ifdef SAFE_DATA + CLEAR_MEM(&ctx, sizeof(ctx)); + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ +} + /** * @brief Four buffer F8 encrypt/decrypt with the same key schedule * @@ -2237,16 +2167,16 @@ void SNOW3G_F8_2_BUFFER(const snow3g_key_schedule_t *pHandle, * @param[in] pIV3 pointer to IV * @param[in] pIV4 pointer to IV * @param[in] pBufferIn1 pointer to an input buffer - * @param[in] pBufferOut1 pointer to an output buffer + * @param[out]pBufferOut1 pointer to an output buffer * @param[in] lengthInBytes1 message size in bytes * @param[in] pBufferIn2 pointer to an input buffer - * @param[in] pBufferOut2 pointer to an output buffer + * @param[out]pBufferOut2 pointer to an output buffer * @param[in] lengthInBytes2 message size in bytes * @param[in] pBufferIn3 pointer to an input buffer - * @param[in] pBufferOut3 pointer to an output buffer + * @param[out]pBufferOut3 pointer to an output buffer * @param[in] lengthInBytes3 message size in bytes * @param[in] pBufferIn4 pointer to an input buffer - * @param[in] pBufferOut4 pointer to an output buffer + * @param[out]pBufferOut4 pointer to an output buffer * @param[in] lengthInBytes4 message size in bytes */ void SNOW3G_F8_4_BUFFER(const snow3g_key_schedule_t *pHandle, @@ -2322,7 +2252,7 @@ void SNOW3G_F8_4_BUFFER(const snow3g_key_schedule_t *pHandle, /* Clock FSM and LFSR once, ignore the key stream */ (void) snow3g_keystream_4_4(&ctx); - /* generates 8 bytes at a time on all streams */ + /* generates 16 bytes at a time on all streams */ while (qwords >= 2) { uint32x4_t ks[4]; @@ -2419,9 +2349,19 @@ void SNOW3G_F8_8_BUFFER_MULTIKEY(const snow3g_key_schedule_t * const pKey[], if (!length_check(lengthInBytes, num_lanes)) return; #endif - for (uint32_t i = 0; i < num_lanes; i++) - SNOW3G_F8_1_BUFFER(pKey[i], IV[i], BufferIn[i], BufferOut[i], - lengthInBytes[i]); + SNOW3G_F8_4_BUFFER_MULTIKEY(pKey[0], pKey[1], pKey[2], pKey[3], + IV[0], IV[1], IV[2], IV[3], + BufferIn[0], BufferOut[0], lengthInBytes[0], + BufferIn[1], BufferOut[1], lengthInBytes[1], + BufferIn[2], BufferOut[2], lengthInBytes[2], + BufferIn[3], BufferOut[3], lengthInBytes[3]); + + SNOW3G_F8_4_BUFFER_MULTIKEY(pKey[4], pKey[5], pKey[6], pKey[7], + IV[4], IV[5], IV[6], IV[7], + BufferIn[4], BufferOut[4], lengthInBytes[4], + BufferIn[5], BufferOut[5], lengthInBytes[5], + BufferIn[6], BufferOut[6], lengthInBytes[6], + BufferIn[7], BufferOut[7], lengthInBytes[7]); #ifdef SAFE_DATA CLEAR_SCRATCH_GPS(); CLEAR_SCRATCH_SIMD_REGS(); @@ -2446,28 +2386,28 @@ void SNOW3G_F8_8_BUFFER_MULTIKEY(const snow3g_key_schedule_t * const pKey[], * @param[in] pIV7 pointer to IV * @param[in] pIV8 pointer to IV * @param[in] pBufIn1 pointer to an input buffer - * @param[in] pBufOut1 pointer to an output buffer + * @param[out]pBufOut1 pointer to an output buffer * @param[in] lenInBytes1 message size in bytes * @param[in] pBufIn2 pointer to an input buffer - * @param[in] pBufOut2 pointer to an output buffer + * @param[out]pBufOut2 pointer to an output buffer * @param[in] lenInBytes2 message size in bytes * @param[in] pBufIn3 pointer to an input buffer - * @param[in] pBufOut3 pointer to an output buffer + * @param[out]pBufOut3 pointer to an output buffer * @param[in] lenInBytes3 message size in bytes * @param[in] pBufIn4 pointer to an input buffer - * @param[in] pBufOut4 pointer to an output buffer + * @param[out]pBufOut4 pointer to an output buffer * @param[in] lenInBytes4 message size in bytes * @param[in] pBufIn5 pointer to an input buffer - * @param[in] pBufOut5 pointer to an output buffer + * @param[out]pBufOut5 pointer to an output buffer * @param[in] lenInBytes5 message size in bytes * @param[in] pBufIn6 pointer to an input buffer - * @param[in] pBufOut6 pointer to an output buffer + * @param[out]pBufOut6 pointer to an output buffer * @param[in] lenInBytes6 message size in bytes * @param[in] pBufIn7 pointer to an input buffer - * @param[in] pBufOut7 pointer to an output buffer + * @param[out]pBufOut7 pointer to an output buffer * @param[in] lenInBytes7 message size in bytes * @param[in] pBufIn8 pointer to an input buffer - * @param[in] pBufOut8 pointer to an output buffer + * @param[out]pBufOut8 pointer to an output buffer * @param[in] lenInBytes8 message size in bytes */ void SNOW3G_F8_8_BUFFER(const snow3g_key_schedule_t *pHandle, @@ -2865,6 +2805,147 @@ void SNOW3G_F8_N_BUFFER_MULTIKEY(const snow3g_key_schedule_t * const pCtx[], } } +/** + * @brief Initializes the four keys for SNOW3G f8/f9. + * + * @param [in/out] pCtx Pointer to snow3g state + * @param [in] pKeySched1 Key1 schedule + * @param [in] pKeySched2 Key2 schedule + * @param [in] pKeySched3 Key3 schedule + * @param [in] pKeySched4 Key4 schedule + * @param [in] pIV1 IV for buffer 1 + * @param [in] pIV2 IV for buffer 2 + * @param [in] pIV3 IV for buffer 3 + * @param [in] pIV4 IV for buffer 4 + */ +void +SNOW3G_F8_4_BUFFER_INITIALIZE(void *pCtx, + const snow3g_key_schedule_t *pKeySched1, + const snow3g_key_schedule_t *pKeySched2, + const snow3g_key_schedule_t *pKeySched3, + const snow3g_key_schedule_t *pKeySched4, + const void *pIV1, const void *pIV2, + const void *pIV3, const void *pIV4) +{ + /* Initialize the schedule from the IV */ + snow3gStateInitialize_4_multikey((snow3gKeyState4_t *)pCtx, + pKeySched1, pKeySched2, + pKeySched3, pKeySched4, + pIV1, pIV2, pIV3, pIV4); + + /* Clock FSM and LFSR once, ignore the key stream */ + (void) snow3g_keystream_4_4((snow3gKeyState4_t *)pCtx); + + return; +} + +/** + * @brief Four buffer F8 encrypt/decrypt after initialize. + * + * @param[in/out] pCtx pointer to snow3g state + * @param[in] pBufferIn1 pointer to an input buffer + * @param[out] pBufferOut1 pointer to an output buffer + * @param[in] pBufferIn2 pointer to an input buffer + * @param[out] pBufferOut2 pointer to an output buffer + * @param[in] pBufferIn3 pointer to an input buffer + * @param[out] pBufferOut3 pointer to an output buffer + * @param[in] pBufferIn4 pointer to an input buffer + * @param[out] pBufferOut4 pointer to an output buffer + */ +void SNOW3G_F8_4_BUFFER_STREAM(void *pCtx, + const void *pBufferIn1, + void *pBufferOut1, + const void *pBufferIn2, + void *pBufferOut2, + const void *pBufferIn3, + void *pBufferOut3, + const void *pBufferIn4, + void *pBufferOut4, + const uint32_t lengthInBytes) +{ + const uint32_t num_lanes = 4; + snow3gKeyState4_t *ctx = (snow3gKeyState4_t *)pCtx; + uint32_t words; + uint8_t *pBufferOut[4]; + const uint8_t *pBufferIn[4]; + + cptr_copy_4((const void **)pBufferIn, + pBufferIn1, pBufferIn2, pBufferIn3, pBufferIn4); + + ptr_copy_4((void **)pBufferOut, pBufferOut1, pBufferOut2, + pBufferOut3, pBufferOut4); +#ifdef SAFE_PARAM + /* reset error status */ + imb_set_errno(NULL, 0); + if (!cptr_check((const void * const *)pCtx, num_lanes, + IMB_ERR_NULL_EXP_KEY)) + return; + if (!cptr_check((const void * const *)pBufferIn, + num_lanes, + IMB_ERR_NULL_SRC)) + return; + if (!ptr_check((void **)pBufferOut, num_lanes, IMB_ERR_NULL_DST)) + return; + if ((lengthInBytes == 0) || (lengthInBytes > SNOW3G_MAX_BYTELEN)) { + imb_set_errno(NULL, IMB_ERR_CIPH_LEN); + return; + } + if (pCtx == NULL) { + imb_set_errno(NULL, IMB_ERR_NULL_CTX); + return; + } +#endif + +#ifdef SAFE_DATA + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ + + words = lengthInBytes / SNOW3G_4_BYTES; + + while (words--) { + uint32x4_t ks4 = snow3g_keystream_4_4(ctx); + xor_keystream_reverse_32(pBufferOut[0], pBufferIn[0], + vgetq_lane_u32(ks4, 0)); + xor_keystream_reverse_32(pBufferOut[1], pBufferIn[1], + vgetq_lane_u32(ks4, 1)); + xor_keystream_reverse_32(pBufferOut[2], pBufferIn[2], + vgetq_lane_u32(ks4, 2)); + xor_keystream_reverse_32(pBufferOut[3], pBufferIn[3], + vgetq_lane_u32(ks4, 3)); + for (uint32_t i = 0; i < num_lanes; i++) { + pBufferIn[i] += SNOW3G_4_BYTES; + pBufferOut[i] += SNOW3G_4_BYTES; + } + } + +#ifdef SAFE_DATA + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ +} + +/** + * @brief One buffer F8 encrypt/decrypt after initialize. + * + * One packet enc/dec after initialize. + * + * @param[in/out] pCtx pointer to snow3g state + * @param[in] pBufferIn pointer to an input buffer + * @param[out] pBufferOut pointer to an output buffer + * @param[in] lengthInBytes length in bytes + */ +void SNOW3G_F8_1_BUFFER_STREAM(void *pCtx, + const void *pBufferIn, + void *pBufferOut, + const uint32_t lengthInBytes) +{ + f8_snow3g((snow3gKeyState1_t *)pCtx, pBufferIn, pBufferOut, lengthInBytes); +#ifdef SAFE_DATA + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ +} + /** * @brief Single buffer bit-length F9 function * @@ -2883,6 +2964,8 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, void *pDigest) { #ifdef SAFE_PARAM + /* reset error status */ + imb_set_errno(NULL, 0); if (pHandle == NULL) { imb_set_errno(NULL, IMB_ERR_NULL_EXP_KEY); return; @@ -2910,11 +2993,6 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, snow3gKeyState1_t ctx; uint32_t z[5]; - uint64_t lengthInQwords, E, P; - uint64_t i; - const uint64_t *inputBuffer; - - inputBuffer = (const uint64_t *)pBufferIn; /* Initialize the SNOW3G key schedule */ snow3gStateInitialize_1(&ctx, pHandle, pIV); @@ -2922,7 +3000,48 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, /*Generate 5 key stream words*/ snow3g_f9_keystream_words(&ctx, &z[0]); + SNOW3G_F9_1_BUFFER_DIGEST(z, pBufferIn, lengthInBits, pDigest); + +#ifdef SAFE_DATA + CLEAR_MEM(&z, sizeof(z)); + CLEAR_MEM(&ctx, sizeof(ctx)); + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ +} + +/** + * @brief Single buffer bit-length F9 function + * + * Single buffer digest with generated keystream. + * + * @param[in] z pointer to pre-generated keystream + * @param[in] pBufferIn pointer to an input buffer + * @param[in] lengthInBits message length in bits + * @param[out] pDigest pointer to store the F9 digest + */ +void SNOW3G_F9_1_BUFFER_DIGEST(const uint32_t z[5], + const void *pBufferIn, + const uint64_t lengthInBits, + void *pDigest) +{ +#ifdef SAFE_PARAM + if ((z == NULL) || (pBufferIn == NULL) || (pDigest == NULL) || + (lengthInBits == 0) || (lengthInBits > SNOW3G_MAX_BITLEN)) + return; +#endif +#ifdef SAFE_DATA + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ + + uint64_t lengthInQwords, E, P; + uint64_t i; + const uint64_t *inputBuffer; + + inputBuffer = (const uint64_t *)pBufferIn; + P = ((uint64_t)z[0] << 32) | ((uint64_t)z[1]); + const uint64x2_t nP = vdupq_n_u64(P); lengthInQwords = lengthInBits / 64; @@ -2934,6 +3053,7 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, const uint64_t P2 = multiply_and_reduce64(P, P); const uint64_t P3 = multiply_and_reduce64(P2, P); const uint64_t P4 = multiply_and_reduce64(P3, P); + const uint64x2_t nP3 = vdupq_n_u64(P3); const uint64_t bs[2] = {0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL}; const uint8x16_t bswap2x64 = vreinterpretq_u8_u64(vld1q_u64(bs)); @@ -2943,7 +3063,6 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, poly64x2_t EV = vdupq_n_p64(0); for (; (i + 3) < lengthInQwords; i+= 4) { - uint64_t m0, m1, m2, m3; uint64x2_t M1_t, M2_t; poly64x2_t t1, t2, t3; /* load 2 x 128-bits and byte swap 64-bit words */ @@ -2955,23 +3074,20 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, vreinterpretq_u8_u64(M1_t), bswap2x64)); M2_t = vreinterpretq_u64_u8(vqtbl1q_u8( vreinterpretq_u8_u64(M2_t), bswap2x64)); - m0 = vgetq_lane_u64(M1_t, 0); - m1 = vgetq_lane_u64(M1_t, 1); - m2 = vgetq_lane_u64(M2_t, 0); - m3 = vgetq_lane_u64(M2_t, 1); /* add current EV to the first word of the message */ - m0 = m0 ^ vgetq_lane_u64(vreinterpretq_u64_p64(EV), 0); - m1 = m1 ^ vgetq_lane_u64(vreinterpretq_u64_p64(EV), 1); + M1_t = M1_t ^ EV; - /* t1 = (M0 x P4) + (M1 x P3) + (M2 x P2) + (M3 x P1) */ - t1 = (poly64x2_t)vmull_p64(m2, P2); - t2 = (poly64x2_t)vmull_p64(m3, P); + /* t1 = (M1.0 x P4) + (M1.1 x P3) + (M2.0 x P2) + (M2.1 x P1) */ + t1 = (poly64x2_t)vmull_p64(vgetq_lane_u64(M2_t, 0), P2); + t2 = (poly64x2_t)vmull_high_p64(vreinterpretq_p64_u64(M2_t), + vreinterpretq_p64_u64(nP)); t1 = t1 ^ t2; - t2 = (poly64x2_t)vmull_p64(m0, P4); - t3 = (poly64x2_t)vmull_p64(m1, P3); + t2 = (poly64x2_t)vmull_p64(vgetq_lane_u64(M1_t, 0), P4); + t3 = (poly64x2_t)vmull_high_p64(vreinterpretq_p64_u64(M1_t), + vreinterpretq_p64_u64(nP3)); t2 = t2 ^ t3; t1 = t2 ^ t1; @@ -2996,12 +3112,10 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, /* add current EV to the first word of the message */ M_t = M_t ^ EV; - poly64_t M0 = vgetq_lane_u64(M_t, 0); - poly64_t M1 = vgetq_lane_u64(M_t, 1); - - /* t1 = (M0 x P2) + (M1 x P1) */ - t1 = (poly64x2_t)vmull_p64(M0, P2); - t2 = (poly64x2_t)vmull_p64(M1, P); + /* t1 = (M.0 x P2) + (M.1 x P1) */ + t1 = (poly64x2_t)vmull_p64(vgetq_lane_u64(M_t, 0), P2); + t2 = (poly64x2_t)vmull_high_p64(vreinterpretq_p64_u64(M_t), + vreinterpretq_p64_u64(nP)); t1 = t1 ^ t2; /* reduce 128-bit product */ @@ -3051,10 +3165,51 @@ void SNOW3G_F9_1_BUFFER(const snow3g_key_schedule_t *pHandle, #ifdef SAFE_DATA CLEAR_VAR(&E, sizeof(E)); CLEAR_VAR(&P, sizeof(P)); - CLEAR_MEM(&z, sizeof(z)); - CLEAR_MEM(&ctx, sizeof(ctx)); CLEAR_SCRATCH_GPS(); CLEAR_SCRATCH_SIMD_REGS(); #endif /* SAFE_DATA */ } + +/** + * @brief Four buffer F9 keystream generation. + * + * @param[in/out] pCtx pointer to snow3g state + * @param[out] ks1 pointer to output keystream1 + * @param[out] ks2 pointer to output keystream2 + * @param[out] ks3 pointer to output keystream3 + * @param[out] ks4 pointer to output keystream4 + */ +void SNOW3G_F9_4_BUFFER_KEYSTREAM(void *pCtx, + uint32_t ks1[5], + uint32_t ks2[5], + uint32_t ks3[5], + uint32_t ks4[5]) +{ + snow3gKeyState4_t *ctx = (snow3gKeyState4_t *)pCtx; + +#ifdef SAFE_PARAM + if (pCtx == NULL) + return; + if (ks1 == NULL || ks2 == NULL || ks3 == NULL || ks4 == NULL) + return; +#endif + +#ifdef SAFE_DATA + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ + + for (int i = 0; i < 5; i++) { + uint32x4_t ks = snow3g_keystream_4_4(ctx); + ks1[i] = vgetq_lane_u32(ks, 0); + ks2[i] = vgetq_lane_u32(ks, 1); + ks3[i] = vgetq_lane_u32(ks, 2); + ks4[i] = vgetq_lane_u32(ks, 3); + } + +#ifdef SAFE_DATA + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif /* SAFE_DATA */ +} + #endif /* SNOW3G_COMMON_H */ diff --git a/lib/aarch64/snow3g_internal.h b/lib/aarch64/snow3g_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..8b7e79224a5ceee3e24418ba862b92bd28d6cb82 --- /dev/null +++ b/lib/aarch64/snow3g_internal.h @@ -0,0 +1,289 @@ +/********************************************************************** + Copyright(c) 2022 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef SNOW3G_INTERNAL_H +#define SNOW3G_INTERNAL_H + +#include +#ifdef SAFE_PARAM +#include "include/error.h" +#endif + +#define MAX_KEY_LEN (16) +#define SNOW3G_4_BYTES (4) +#define SNOW3G_8_BYTES (8) +#define SNOW3G_8_BITS (8) +#define SNOW3G_16_BYTES (16) +#define SNOW3G_16_BITS (16) + +#define SNOW3G_BLOCK_SIZE (8) + +#define SNOW3G_KEY_LEN_IN_BYTES (16) /* 128b */ +#define SNOW3G_IV_LEN_IN_BYTES (16) /* 128b */ + +#define SNOW3GCONSTANT (0x1b) + +/* Range of input data for SNOW3G is from 1 to 2^32 bits */ +#define SNOW3G_MIN_LEN 1 +#define SNOW3G_MAX_BITLEN (UINT32_MAX) +#define SNOW3G_MAX_BYTELEN (UINT32_MAX / 8) + +typedef union SafeBuffer { + uint64_t b64; + uint32_t b32[2]; + uint8_t b8[SNOW3G_8_BYTES]; +} SafeBuf; + +typedef struct snow3gKeyState1_s { + /* 16 LFSR stages */ + uint32_t LFSR_S[16]; + /* 3 FSM states */ + uint32_t FSM_R3; + uint32_t FSM_R2; + uint32_t FSM_R1; +} DECLARE_ALIGNED(snow3gKeyState1_t, 16); + +typedef struct snow3gKeyState4_s { + /* 16 LFSR stages */ + uint32x4_t LFSR_X[16]; + /* 3 FSM states */ + uint32x4_t FSM_X[3]; + uint32_t iLFSR_X; +} snow3gKeyState4_t; + +/** + * @brief Finds minimum 32-bit value in an array + * @return Min 32-bit value + */ +static inline uint32_t +length_find_min(const uint32_t *out_array, const size_t dim_array) +{ + size_t i; + uint32_t min = 0; + + if (dim_array > 0) + min = out_array[0]; + + for (i = 1; i < dim_array; i++) + if (out_array[i] < min) + min = out_array[i]; + + return min; +} + +/** + * @brief Subtracts \a subv from a vector of 32-bit words + */ +static inline void +length_sub(uint32_t *out_array, const size_t dim_array, const uint32_t subv) +{ + size_t i; + + for (i = 0; i < dim_array; i++) + out_array[i] -= subv; +} + +#ifdef SAFE_PARAM +/** + * @brief Checks vector of length values against 0 and SNOW3G_MAX_BYTELEN values + * @retval 0 incorrect length value found + * @retval 1 all OK + */ +static inline uint32_t +length_check(const uint32_t *out_array, const size_t dim_array) +{ + size_t i; + + if (out_array == NULL) { + imb_set_errno(NULL, IMB_ERR_CIPH_LEN); + return 0; + } + + for (i = 0; i < dim_array; i++) { + if ((out_array[i] == 0) || + (out_array[i] > SNOW3G_MAX_BYTELEN)) { + imb_set_errno(NULL, IMB_ERR_CIPH_LEN); + return 0; + } + } + + return 1; +} +#endif + +/** + * @brief Copies 4 32-bit length values into an array + */ +static inline void +length_copy_4(uint32_t *out_array, + const uint32_t length1, const uint32_t length2, + const uint32_t length3, const uint32_t length4) +{ + out_array[0] = length1; + out_array[1] = length2; + out_array[2] = length3; + out_array[3] = length4; +} + +/** + * @brief Copies 8 32-bit length values into an array + */ +static inline void +length_copy_8(uint32_t *out_array, + const uint32_t length1, const uint32_t length2, + const uint32_t length3, const uint32_t length4, + const uint32_t length5, const uint32_t length6, + const uint32_t length7, const uint32_t length8) +{ + out_array[0] = length1; + out_array[1] = length2; + out_array[2] = length3; + out_array[3] = length4; + out_array[4] = length5; + out_array[5] = length6; + out_array[6] = length7; + out_array[7] = length8; +} + +#ifdef SAFE_PARAM +/** + * @brief Checks vector of pointers against NULL + * @retval 0 incorrect pointer found + * @retval 1 all OK + */ +static inline int +ptr_check(void *out_array[], const size_t dim_array, const int errnum) +{ + size_t i; + + if (out_array == NULL) { + imb_set_errno(NULL, errnum); + return 0; + } + for (i = 0; i < dim_array; i++) + if (out_array[i] == NULL) { + imb_set_errno(NULL, errnum); + return 0; + } + return 1; +} +#endif + +#ifdef SAFE_PARAM +/** + * @brief Checks vector of const pointers against NULL + * @retval 0 incorrect pointer found + * @retval 1 all OK + */ +static inline int +cptr_check(const void * const out_array[], + const size_t dim_array, + const int errnum) +{ + size_t i; + + if (out_array == NULL) { + imb_set_errno(NULL, errnum); + return 0; + } + for (i = 0; i < dim_array; i++) + if (out_array[i] == NULL) { + imb_set_errno(NULL, errnum); + return 0; + } + + return 1; +} +#endif + +/** + * @brief Copies 4 pointers into an array + */ +static inline void +ptr_copy_4(void *out_array[], + void *ptr1, void *ptr2, void *ptr3, void *ptr4) +{ + out_array[0] = ptr1; + out_array[1] = ptr2; + out_array[2] = ptr3; + out_array[3] = ptr4; +} + +/** + * @brief Copies 4 const pointers into an array + */ +static inline void +cptr_copy_4(const void *out_array[], + const void *ptr1, const void *ptr2, + const void *ptr3, const void *ptr4) +{ + out_array[0] = ptr1; + out_array[1] = ptr2; + out_array[2] = ptr3; + out_array[3] = ptr4; +} + +/** + * @brief Copies 8 pointers into an array + */ +static inline void +ptr_copy_8(void *out_array[], + void *ptr1, void *ptr2, void *ptr3, void *ptr4, + void *ptr5, void *ptr6, void *ptr7, void *ptr8) +{ + out_array[0] = ptr1; + out_array[1] = ptr2; + out_array[2] = ptr3; + out_array[3] = ptr4; + out_array[4] = ptr5; + out_array[5] = ptr6; + out_array[6] = ptr7; + out_array[7] = ptr8; +} + +/** + * @brief Copies 8 const pointers into an array + */ +static inline void +cptr_copy_8(const void *out_array[], + const void *ptr1, const void *ptr2, + const void *ptr3, const void *ptr4, + const void *ptr5, const void *ptr6, + const void *ptr7, const void *ptr8) +{ + out_array[0] = ptr1; + out_array[1] = ptr2; + out_array[2] = ptr3; + out_array[3] = ptr4; + out_array[4] = ptr5; + out_array[5] = ptr6; + out_array[6] = ptr7; + out_array[7] = ptr8; +} + +#endif /* SNOW3G_INTERNAL_H */ diff --git a/lib/include/snow3g.h b/lib/include/snow3g.h index 13bdbb156d9414eab0c7a5bfcea5be16b0967b7d..9bf40ae85f1cec766b1037793dfbd9cde5eee188 100644 --- a/lib/include/snow3g.h +++ b/lib/include/snow3g.h @@ -702,6 +702,29 @@ snow3g_f8_4_buffer_aarch64(const snow3g_key_schedule_t *pCtx, const void *pBufferIn4, void *pBufferOut4, const uint32_t lengthInBytes4); + +void +snow3g_f8_4_buffer_multikey_aarch64(const snow3g_key_schedule_t *pCtx1, + const snow3g_key_schedule_t *pCtx2, + const snow3g_key_schedule_t *pCtx3, + const snow3g_key_schedule_t *pCtx4, + const void *pIV1, + const void *pIV2, + const void *pIV3, + const void *pIV4, + const void *pBufferIn1, + void *pBufferOut1, + const uint32_t lengthInBytes1, + const void *pBufferIn2, + void *pBufferOut2, + const uint32_t lengthInBytes2, + const void *pBufferIn3, + void *pBufferOut3, + const uint32_t lengthInBytes3, + const void *pBufferIn4, + void *pBufferOut4, + const uint32_t lengthInBytes4); + void snow3g_f8_8_buffer_aarch64(const snow3g_key_schedule_t *pCtx, const void *pIV1, @@ -759,6 +782,32 @@ snow3g_f8_n_buffer_multikey_aarch64(const snow3g_key_schedule_t * const pCtx[], void *pBufferOut[], const uint32_t bufferLenInBytes[], const uint32_t bufferCount); +void +snow3g_f8_4_buffer_initialize_aarch64(void *pCtx, + const snow3g_key_schedule_t *pKeySched1, + const snow3g_key_schedule_t *pKeySched2, + const snow3g_key_schedule_t *pKeySched3, + const snow3g_key_schedule_t *pKeySched4, + const void *pIV1, const void *pIV2, + const void *pIV3, const void *pIV4); + +void +snow3g_f8_1_buffer_stream_aarch64(void *pCtx, + const void *pBufferIn, + void *pBufferOut, + const uint32_t lengthInBytes); + +void +snow3g_f8_4_buffer_stream_aarch64(void *pCtx, + const void *pBufferIn1, + void *pBufferOut1, + const void *pBufferIn2, + void *pBufferOut2, + const void *pBufferIn3, + void *pBufferOut3, + const void *pBufferIn4, + void *pBufferOut4, + const uint32_t lengthInBytes); void snow3g_f9_1_buffer_aarch64(const snow3g_key_schedule_t *pCtx, @@ -767,6 +816,19 @@ snow3g_f9_1_buffer_aarch64(const snow3g_key_schedule_t *pCtx, const uint64_t lengthInBits, void *pDigest); +void +snow3g_f9_1_buffer_digest_aarch64(const uint32_t z[5], + const void *pBufferIn, + const uint64_t lengthInBits, + void *pDigest); + +void +snow3g_f9_4_buffer_keystream_aarch64(void *pCtx, + uint32_t ks1[5], + uint32_t ks2[5], + uint32_t ks3[5], + uint32_t ks4[5]); + size_t snow3g_key_sched_size_aarch64(void); @@ -821,6 +883,28 @@ snow3g_f8_4_buffer_aarch64_no_aesni(const snow3g_key_schedule_t *pCtx, void *pBufferOut4, const uint32_t lengthInBytes4); +void +snow3g_f8_4_buffer_multikey_aarch64_no_aesni(const snow3g_key_schedule_t *pCtx1, + const snow3g_key_schedule_t *pCtx2, + const snow3g_key_schedule_t *pCtx3, + const snow3g_key_schedule_t *pCtx4, + const void *pIV1, + const void *pIV2, + const void *pIV3, + const void *pIV4, + const void *pBufferIn1, + void *pBufferOut1, + const uint32_t lengthInBytes1, + const void *pBufferIn2, + void *pBufferOut2, + const uint32_t lengthInBytes2, + const void *pBufferIn3, + void *pBufferOut3, + const uint32_t lengthInBytes3, + const void *pBufferIn4, + void *pBufferOut4, + const uint32_t lengthInBytes4); + void snow3g_f8_8_buffer_aarch64_no_aesni(const snow3g_key_schedule_t *pCtx, const void *pIV1, @@ -881,6 +965,33 @@ snow3g_f8_n_buffer_multikey_aarch64_no_aesni(const snow3g_key_schedule_t * const const uint32_t bufferLenInBytes[], const uint32_t bufferCount); +void +snow3g_f8_4_buffer_initialize_aarch64_no_aesni(void *pCtx, + const snow3g_key_schedule_t *pKeySched1, + const snow3g_key_schedule_t *pKeySched2, + const snow3g_key_schedule_t *pKeySched3, + const snow3g_key_schedule_t *pKeySched4, + const void *pIV1, const void *pIV2, + const void *pIV3, const void *pIV4); + +void +snow3g_f8_1_buffer_stream_aarch64_no_aesni(void *pCtx, + const void *pBufferIn, + void *pBufferOut, + const uint32_t lengthInBytes); + +void +snow3g_f8_4_buffer_stream_aarch64_no_aesni(void *pCtx, + const void *pBufferIn1, + void *pBufferOut1, + const void *pBufferIn2, + void *pBufferOut2, + const void *pBufferIn3, + void *pBufferOut3, + const void *pBufferIn4, + void *pBufferOut4, + const uint32_t lengthInBytes); + void snow3g_f9_1_buffer_aarch64_no_aesni(const snow3g_key_schedule_t *pCtx, const void *pIV, @@ -888,10 +999,24 @@ snow3g_f9_1_buffer_aarch64_no_aesni(const snow3g_key_schedule_t *pCtx, const uint64_t lengthInBits, void *pDigest); +void +snow3g_f9_1_buffer_digest_aarch64_no_aesni(const uint32_t z[5], + const void *pBufferIn, + const uint64_t lengthInBits, + void *pDigest); + +void +snow3g_f9_4_buffer_keystream_aarch64_no_aesni(void *pCtx, + uint32_t ks1[5], + uint32_t ks2[5], + uint32_t ks3[5], + uint32_t ks4[5]); + size_t snow3g_key_sched_size_aarch64_no_aesni(void); int snow3g_init_key_sched_aarch64_no_aesni(const void *pKey, snow3g_key_schedule_t *pCtx); + #endif /* _SNOW3G_H_ */ diff --git a/lib/ipsec-mb.h b/lib/ipsec-mb.h index e1b8130d47c981e7b71332a926a2703463efdf90..4de506b435f128f12b886f0abb394e193df64891 100644 --- a/lib/ipsec-mb.h +++ b/lib/ipsec-mb.h @@ -957,6 +957,19 @@ typedef void (*snow3g_f8_4_buffer_t)(const snow3g_key_schedule_t *, const uint32_t, const void *, void *, const uint32_t); +#ifdef __aarch64__ +typedef void (*snow3g_f8_4_buffer_multikey_t)(const snow3g_key_schedule_t *, + const snow3g_key_schedule_t *, + const snow3g_key_schedule_t *, + const snow3g_key_schedule_t *, + const void *, const void *, const void *, + const void *, const void *, void *, + const uint32_t, const void *, void *, + const uint32_t, const void *, void *, + const uint32_t, const void *, void *, + const uint32_t); +#endif //__aarch64__ + typedef void (*snow3g_f8_8_buffer_t)(const snow3g_key_schedule_t *, const void *, const void *, const void *, const void *, const void *, const void *, @@ -1164,6 +1177,9 @@ typedef struct IMB_MGR { snow3g_f8_4_buffer_t snow3g_f8_4_buffer; snow3g_f8_8_buffer_t snow3g_f8_8_buffer; snow3g_f8_n_buffer_t snow3g_f8_n_buffer; +#ifdef __aarch64__ + snow3g_f8_4_buffer_multikey_t snow3g_f8_4_buffer_multikey; +#endif //__aarch64__ snow3g_f8_8_buffer_multikey_t snow3g_f8_8_buffer_multikey; snow3g_f8_n_buffer_multikey_t snow3g_f8_n_buffer_multikey; snow3g_f9_1_buffer_t snow3g_f9_1_buffer; @@ -2425,6 +2441,51 @@ IMB_DLL_EXPORT void init_mb_mgr_auto(IMB_MGR *state, IMB_ARCH *arch); (_src3), (_dst3), (_len3), \ (_src4), (_dst4), (_len4))) +#ifdef __aarch64__ +/** + ******************************************************************************* + * This function performs snow3g f8 operation on four buffers. They will + * be processed with different keys, which has already been scheduled with + * snow3g_init_key_sched(). + * + * @param[in] _mgr Pointer to multi-buffer structure + * @param[in] _exp_key1 Context where the scheduled key1 are stored + * @param[in] _exp_key2 Context where the scheduled key2 are stored + * @param[in] _exp_key3 Context where the scheduled key3 are stored + * @param[in] _exp_key4 Context where the scheduled key4 are stored + * @param[in] _iv1 IV to use for buffer pBufferIn1 + * @param[in] _iv2 IV to use for buffer pBufferIn2 + * @param[in] _iv3 IV to use for buffer pBufferIn3 + * @param[in] _iv4 IV to use for buffer pBufferIn4 + * @param[in] _src1 Input buffer 1 + * @param[out] _dst1 Output buffer 1 + * @param[in] _len1 Length in bytes of input buffer 1 + * @param[in] _src2 Input buffer 2 + * @param[out] _dst2 Output buffer 2 + * @param[in] _len2 Length in bytes of input buffer 2 + * @param[in] _src3 Input buffer 3 + * @param[out] _dst3 Output buffer 3 + * @param[in] _len3 Length in bytes of input buffer 3 + * @param[in] _src4 Input buffer 4 + * @param[out] _dst4 Output buffer 4 + * @param[in] _len4 Length in bytes of input buffer 4 + */ +#define IMB_SNOW3G_F8_4_BUFFER_MULTIKEY(_mgr, \ + _exp_key1, _exp_key2, _exp_key3, _exp_key4, \ + _iv1, _iv2, _iv3, _iv4, \ + _src1, _dst1, _len1, \ + _src2, _dst2, _len2, \ + _src3, _dst3, _len3, \ + _src4, _dst4, _len4) \ + ((_mgr)->snow3g_f8_4_buffer_multikey((_exp_key1), (_exp_key2), \ + (_exp_key3), (_exp_key4), \ + (_iv1), (_iv2), (_iv3), (_iv4), \ + (_src1), (_dst1), (_len1), \ + (_src2), (_dst2), (_len2), \ + (_src3), (_dst3), (_len3), \ + (_src4), (_dst4), (_len4))) +#endif //__aarch64__ + /** ******************************************************************************* * This function performs snow3g f8 operation on eight buffers. They will diff --git a/perf/ipsec_perf.c b/perf/ipsec_perf.c index e0b1a6111e5a99577f04ab1c27d78d0e25a8a981..d3e96a386602c608385d9bc9ab1140697aefb250 100644 --- a/perf/ipsec_perf.c +++ b/perf/ipsec_perf.c @@ -4150,10 +4150,18 @@ int main(int argc, char *argv[]) arch_str_map[arch_id].name); } } - +#ifdef __aarch64__ + /* The scale maybe less than 0.01 on AARCH64, so precision of .3f + is not enough. Use .6f instead of .3f */ + if (tsc_detect) + fprintf(stderr, "TSC scaling to core cycles: %.6f\n", + get_tsc_to_core_scale(turbo_enabled)); +#else if (tsc_detect) fprintf(stderr, "TSC scaling to core cycles: %.3f\n", get_tsc_to_core_scale(turbo_enabled)); +#endif + #ifdef __aarch64__ fprintf(stderr, "CNT frequency: %ld\n", read_cntfreq()); #endif