diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 42f65ca8bbe2c75cb7dc0c15b5430516a067cb1d..c5cf05f057589db270bfa6afd9b1546adb66d402 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -162,7 +162,8 @@ if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") "${DIR_X86_64}/zuc_iv.c" "${DIR_X86_64}/snow3g_iv.c" "${DIR_X86_64}/snow3g_tables.c" - "${DIR_X86_64}/ooo_mgr_reset.c") + "${DIR_X86_64}/ooo_mgr_reset.c" + "${DIR_X86_64}/capabilities.c") set(SRC_FILES_NO_AESNI "${DIR_NO_AESNI}/aesni_emu.c") else() file(GLOB SRC_FILES_AVX_T1 "${DIR_AVX_T1}/*.c") diff --git a/lib/Makefile b/lib/Makefile index bfe8cb627417f195aab805d4853c3eb34375fbb7..f6fd7e7eeacb483189969d24738202399ca2b244 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -376,7 +376,8 @@ c_lib_objs := \ cipher_suite_id.o \ ooo_mgr_reset.o \ hmac_ipad_opad_aarch64.o \ - self_test_aarch64.o + self_test_aarch64.o \ + capabilities.o asm_generic_lib_objs := \ lookup_16x8bit_neon.o \ snow3g_impl_aarch64_neon.o \ @@ -439,7 +440,8 @@ c_lib_objs := \ quic_hp_chacha20.o \ quic_chacha20_poly1305.o \ hmac_ipad_opad.o \ - cipher_suite_id.o + cipher_suite_id.o \ + capabilities.o ifeq ($(AESNI_EMU), y) c_lib_objs := $(c_lib_objs) \ @@ -777,7 +779,11 @@ asm_avx2_t4_lib_objs := \ sm3_msg_avx2.o \ sm3_hmac_avx2.o \ sha512_x1_ni_avx2.o \ - sha512_hmac_ni_avx2.o + sha512_x2_ni_avx2.o \ + mb_mgr_hmac_sha512_flush_ni_avx2.o \ + mb_mgr_hmac_sha512_submit_ni_avx2.o \ + mb_mgr_hmac_sha384_flush_ni_avx2.o \ + mb_mgr_hmac_sha384_submit_ni_avx2.o # # List of ASM modules (avx512 directory) diff --git a/lib/avx2_t1/mb_mgr_avx2_t1.c b/lib/avx2_t1/mb_mgr_avx2_t1.c index 58d6b08fb94f032ed0964b522f672409c17fbeff..5c8b0a3cc58109d59a876811feb7512976f719af 100644 --- a/lib/avx2_t1/mb_mgr_avx2_t1.c +++ b/lib/avx2_t1/mb_mgr_avx2_t1.c @@ -71,6 +71,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx2_t1 #define SUBMIT_HASH_BURST submit_hash_burst_avx2_t1 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx2_t1 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx2_t1 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx2_t1 #define SET_SUITE_ID_FN set_suite_id_avx2_t1 /* Hash */ @@ -346,6 +348,9 @@ init_mb_mgr_avx2_t1_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX2; + /* Set architecture type for future checks */ + state->used_arch_type = 1; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -369,6 +374,8 @@ init_mb_mgr_avx2_t1_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx2; diff --git a/lib/avx2_t1/sha_mb_avx2.c b/lib/avx2_t1/sha_mb_avx2.c index eee83390405a045e4acf7a70bb4fa2c66adf2d4a..5777ccbf32d55fc352226932d9e3d60f9b23f0c0 100644 --- a/lib/avx2_t1/sha_mb_avx2.c +++ b/lib/avx2_t1/sha_mb_avx2.c @@ -111,7 +111,7 @@ IMB_JOB * submit_job_sha384_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 4, 1, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x4_avx2_from_c); + SHA384_PAD_SIZE, call_sha512_x4_avx2_from_c, 0); } IMB_DLL_LOCAL @@ -119,7 +119,7 @@ IMB_JOB * flush_job_sha384_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 4, 0, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x4_avx2_from_c); + SHA384_PAD_SIZE, call_sha512_x4_avx2_from_c, 0); } /* ========================================================================== */ @@ -132,7 +132,7 @@ IMB_JOB * submit_job_sha512_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 4, 1, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x4_avx2_from_c); + SHA512_PAD_SIZE, call_sha512_x4_avx2_from_c, 0); } IMB_DLL_LOCAL @@ -140,5 +140,5 @@ IMB_JOB * flush_job_sha512_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 4, 0, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x4_avx2_from_c); + SHA512_PAD_SIZE, call_sha512_x4_avx2_from_c, 0); } diff --git a/lib/avx2_t2/mb_mgr_avx2_t2.c b/lib/avx2_t2/mb_mgr_avx2_t2.c index 5ccb6be4bc0978e65ae74652e343e98d15b4b5c0..3cb93029cde3be666035590ea6a8eee03e3c24f5 100644 --- a/lib/avx2_t2/mb_mgr_avx2_t2.c +++ b/lib/avx2_t2/mb_mgr_avx2_t2.c @@ -73,6 +73,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx2_t2 #define SUBMIT_HASH_BURST submit_hash_burst_avx2_t2 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx2_t2 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx2_t2 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx2_t2 #define SET_SUITE_ID_FN set_suite_id_avx2_t2 /* Hash */ @@ -349,6 +351,9 @@ init_mb_mgr_avx2_t2_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX2; + /* Set architecture type for future checks */ + state->used_arch_type = 2; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -372,6 +377,8 @@ init_mb_mgr_avx2_t2_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx2; diff --git a/lib/avx2_t3/mb_mgr_avx2_t3.c b/lib/avx2_t3/mb_mgr_avx2_t3.c index 884856bcb6a447fa8912d366f3cbe6e3fd3fe1bb..16b6d20c2f9ab3843cdf31da0839c8671dffccb1 100644 --- a/lib/avx2_t3/mb_mgr_avx2_t3.c +++ b/lib/avx2_t3/mb_mgr_avx2_t3.c @@ -74,6 +74,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx2_t3 #define SUBMIT_HASH_BURST submit_hash_burst_avx2_t3 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx2_t3 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx2_t3 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx2_t3 #define SET_SUITE_ID_FN set_suite_id_avx2_t3 /* Hash */ @@ -349,6 +351,9 @@ init_mb_mgr_avx2_t3_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX2; + /* Set architecture type for future checks */ + state->used_arch_type = 3; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -372,6 +377,8 @@ init_mb_mgr_avx2_t3_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx2; diff --git a/lib/avx2_t4/mb_mgr_avx2_t4.c b/lib/avx2_t4/mb_mgr_avx2_t4.c index 16cd470fd9a67714eea080ca79ca749939a97d83..cc9583d99b95f27f10788dacd792ee69c72b145a 100644 --- a/lib/avx2_t4/mb_mgr_avx2_t4.c +++ b/lib/avx2_t4/mb_mgr_avx2_t4.c @@ -75,6 +75,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx2_t4 #define SUBMIT_HASH_BURST submit_hash_burst_avx2_t4 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx2_t4 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx2_t4 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx2_t4 #define SET_SUITE_ID_FN set_suite_id_avx2_t4 /* Hash */ @@ -294,10 +296,10 @@ reset_ooo_mgrs(IMB_MGR *state) ooo_mgr_hmac_sha256_reset(state->hmac_sha_256_ooo, 2); /* Init HMAC/SHA384 out-of-order fields */ - ooo_mgr_hmac_sha384_reset(state->hmac_sha_384_ooo, AVX2_NUM_SHA512_LANES); + ooo_mgr_hmac_sha384_reset(state->hmac_sha_384_ooo, 2); /* Init HMAC/SHA512 out-of-order fields */ - ooo_mgr_hmac_sha512_reset(state->hmac_sha_512_ooo, AVX2_NUM_SHA512_LANES); + ooo_mgr_hmac_sha512_reset(state->hmac_sha_512_ooo, 2); /* Init HMAC/MD5 out-of-order fields */ ooo_mgr_hmac_md5_reset(state->hmac_md5_ooo, AVX2_NUM_MD5_LANES); @@ -350,6 +352,9 @@ init_mb_mgr_avx2_t4_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX2; + /* Set architecture type for future checks */ + state->used_arch_type = 4; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -373,6 +378,8 @@ init_mb_mgr_avx2_t4_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx2; diff --git a/lib/avx2_t4/mb_mgr_hmac_sha384_flush_ni_avx2.asm b/lib/avx2_t4/mb_mgr_hmac_sha384_flush_ni_avx2.asm new file mode 100644 index 0000000000000000000000000000000000000000..bb2178dec47d77d04c9859da67e8efc789f5ad92 --- /dev/null +++ b/lib/avx2_t4/mb_mgr_hmac_sha384_flush_ni_avx2.asm @@ -0,0 +1,31 @@ +;; +;; Copyright (c) 2024, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; + +%define FUNC flush_job_hmac_sha_384_ni_avx2 +%define SHA_X_DIGEST_SIZE 384 + +%include "avx2_t4/mb_mgr_hmac_sha512_flush_ni_avx2.asm" diff --git a/lib/avx2_t4/mb_mgr_hmac_sha384_submit_ni_avx2.asm b/lib/avx2_t4/mb_mgr_hmac_sha384_submit_ni_avx2.asm new file mode 100644 index 0000000000000000000000000000000000000000..daca35c2c7790963a85dde227ba606b2ab03a8b1 --- /dev/null +++ b/lib/avx2_t4/mb_mgr_hmac_sha384_submit_ni_avx2.asm @@ -0,0 +1,30 @@ +;; Copyright (c) 2024, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; + +%define FUNC submit_job_hmac_sha_384_ni_avx2 +%define SHA_X_DIGEST_SIZE 384 + +%include "avx2_t4/mb_mgr_hmac_sha512_submit_ni_avx2.asm" diff --git a/lib/avx2_t4/mb_mgr_hmac_sha512_flush_ni_avx2.asm b/lib/avx2_t4/mb_mgr_hmac_sha512_flush_ni_avx2.asm new file mode 100644 index 0000000000000000000000000000000000000000..8c94253d1b2244415752c7f46ae3318f3bf4cb9d --- /dev/null +++ b/lib/avx2_t4/mb_mgr_hmac_sha512_flush_ni_avx2.asm @@ -0,0 +1,305 @@ +;; +;; Copyright (c) 2024, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; + +%include "include/os.inc" +%include "include/imb_job.inc" +%include "include/mb_mgr_datastruct.inc" +%include "include/reg_sizes.inc" + +%ifndef FUNC +%define FUNC flush_job_hmac_sha_512_ni_avx2 +%define SHA_X_DIGEST_SIZE 512 +%endif + +extern sha512_ni_x2_avx2 + +mksection .rodata +default rel + +align 16 +byteswap: + dq 0x0001020304050607, 0x08090a0b0c0d0e0f + dq 0x0001020304050607, 0x08090a0b0c0d0e0f +len_masks: + dq 0xFFFFFFFF0000FFFF, 0xFFFFFFFFFFFFFFFF + dq 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF +lane_1: dq 1 + +mksection .text + +%ifdef LINUX +%define arg1 rdi +%define arg2 rsi +%else +%define arg1 rcx +%define arg2 rdx +%endif + +%define state arg1 +%define job arg2 +%define len2 arg2 + +; idx needs to be in rbp, r15 +%define idx rbp + +%define unused_lanes rbx +%define lane_data rbx +%define tmp2 rbx + +%define job_rax rax +%define size_offset rax +%define tmp rax +%define start_offset rax + +%define tmp3 arg1 +%define extra_blocks arg2 +%define p arg2 + +%define tmp4 r8 +%define tmp5 r9 +%define tmp6 r10 + +struc STACK +_gpr_save: resq 3 +_rsp_save: resq 1 +endstruc + +%define APPEND(a,b) a %+ b + +; JOB* FUNC(MB_MGR_HMAC_SHA_512_OOO *state) +; arg 1 : state +MKGLOBAL(FUNC,function,internal) +FUNC: + mov rax, rsp + sub rsp, STACK_size + mov [rsp + _gpr_save + 8*0], rbx + mov [rsp + _gpr_save + 8*1], rbp + mov [rsp + _gpr_save + 8*2], r12 + mov [rsp + _rsp_save], rax ; original SP + + mov unused_lanes, [state + _unused_lanes_sha512] + bt unused_lanes, 16+7 + jc return_null + + ; find a lane with a non-null job + xor idx, idx + cmp qword [state + _ldata_sha512 + 1 * _SHA512_LANE_DATA_size + _job_in_lane_sha512], 0 + cmovne idx, [rel lane_1] + +copy_lane_data: + ; copy good lane (idx) to empty lanes + vmovdqa xmm0, [state + _lens_sha512] + mov tmp, [state + _args_sha512 + _data_ptr_sha512 + PTR_SZ*idx] + +%assign I 0 +%rep 2 + cmp qword [state + _ldata_sha512 + I * _SHA512_LANE_DATA_size + _job_in_lane_sha512], 0 + jne APPEND(skip_,I) + mov [state + _args_sha512 + _data_ptr_sha512 + PTR_SZ*I], tmp + vpor xmm0, xmm0, [rel len_masks + 16*I] +APPEND(skip_,I): +%assign I (I+1) +%endrep + vmovdqa [state + _lens_sha512], xmm0 + + vphminposuw xmm1, xmm0 + vpextrw DWORD(len2), xmm1, 0 ; min value + vpextrw DWORD(idx), xmm1, 1 ; min index (0...3) + cmp len2, 0 + je len_is_0 + + vpshuflw xmm1, xmm1, 0x00 + vpsubw xmm0, xmm0, xmm1 + vmovdqa [state + _lens_sha512], xmm0 + + ; "state" and "args" are the same address, arg1 + ; len is arg2 + call sha512_ni_x2_avx2 + ; state and idx are intact + +len_is_0: + ; process completed job "idx" + imul lane_data, idx, _SHA512_LANE_DATA_size + lea lane_data, [state + _ldata_sha512 + lane_data] + mov DWORD(extra_blocks), [lane_data + _extra_blocks_sha512] + cmp extra_blocks, 0 + jne proc_extra_blocks + cmp dword [lane_data + _outer_done_sha512], 0 + jne end_loop + +proc_outer: + mov dword [lane_data + _outer_done_sha512], 1 + mov DWORD(size_offset), [lane_data + _size_offset_sha512] + mov qword [lane_data + _extra_block_sha512 + size_offset], 0 + mov word [state + _lens_sha512 + 2*idx], 1 + lea tmp, [lane_data + _outer_block_sha512] + mov job, [lane_data + _job_in_lane_sha512] + mov [state + _args_data_ptr_sha512 + PTR_SZ*idx], tmp + + ; move digest into data location + lea tmp5, [idx*8] ;; scale up to SHA512_DIGEST_ROW_SIZE (8*8) + vmovdqu ymm0, [state + _args_digest_sha512 + tmp5*8] + vmovdqu ymm1, [state + _args_digest_sha512 + tmp5*8 + 32] + vpshufb ymm0, [rel byteswap] + vpshufb ymm1, [rel byteswap] + vmovdqu [lane_data + _outer_block_sha512], ymm0 +%if (SHA_X_DIGEST_SIZE != 384) + vmovdqu [lane_data + _outer_block_sha512+32], ymm1 +%else + vmovdqu [lane_data + _outer_block_sha512+32], xmm1 +%endif + + ; move the opad key into digest + mov tmp, [job + _auth_key_xor_opad] + + vmovdqu ymm0, [tmp] + vmovdqu ymm1, [tmp + 32] + vmovdqu [state + _args_digest_sha512 + tmp5*8], ymm0 + vmovdqu [state + _args_digest_sha512 + tmp5*8 + 32], ymm1 + + jmp copy_lane_data + + align 16 +proc_extra_blocks: + mov DWORD(start_offset), [lane_data + _start_offset_sha512] + mov [state + _lens_sha512 + 2*idx], WORD(extra_blocks) + lea tmp, [lane_data + _extra_block_sha512 + start_offset] + mov [state + _args_data_ptr_sha512 + PTR_SZ*idx], tmp + mov dword [lane_data + _extra_blocks_sha512], 0 + jmp copy_lane_data + +return_null: + xor job_rax, job_rax + jmp return + + align 16 +end_loop: + mov job_rax, [lane_data + _job_in_lane_sha512] + mov qword [lane_data + _job_in_lane_sha512], 0 + or dword [job_rax + _status], IMB_STATUS_COMPLETED_AUTH + mov unused_lanes, [state + _unused_lanes_sha512] + shl unused_lanes, 8 + or unused_lanes, idx + mov [state + _unused_lanes_sha512], unused_lanes + + mov p, [job_rax + _auth_tag_output] + + ;; scale idx*64 + shl idx, 6 + +%if (SHA_X_DIGEST_SIZE != 384) + cmp qword [job_rax + _auth_tag_output_len_in_bytes], 32 + jne copy_full_digest +%else + cmp qword [job_rax + _auth_tag_output_len_in_bytes], 24 + jne copy_full_digest +%endif + +%if (SHA_X_DIGEST_SIZE != 384) + ;; copy 32 bytes for SHA512 / 24 bytes for SHA384 + vmovdqu ymm0, [state + _args_digest_sha512 + idx] + vpshufb ymm0, [rel byteswap] + vmovdqu [p], ymm0 +%else + vmovdqu xmm0, [state + _args_digest_sha512 + idx] + vpshufb xmm0, [rel byteswap] + mov QWORD(tmp2), [state + _args_digest_sha512 + idx + 16] + bswap QWORD(tmp2) + vmovdqu [p], xmm0 + mov [p + 16], QWORD(tmp2) +%endif + jmp clear_ret + +copy_full_digest: + ;; copy 64 bytes for SHA512 / 48 bytes for SHA384 +%if (SHA_X_DIGEST_SIZE != 384) + vmovdqu ymm0, [state + _args_digest_sha512 + idx + 0*SHA512_DIGEST_WORD_SIZE] + vmovdqu ymm1, [state + _args_digest_sha512 + idx + 4*SHA512_DIGEST_WORD_SIZE] + vpshufb ymm0, [rel byteswap] + vpshufb ymm1, [rel byteswap] + vmovdqu [p], ymm0 + vmovdqu [p + 32], ymm1 +%else + vmovdqu ymm0, [state + _args_digest_sha512 + idx + 0*SHA512_DIGEST_WORD_SIZE] + vmovdqu xmm1, [state + _args_digest_sha512 + idx + 4*SHA512_DIGEST_WORD_SIZE] + vpshufb ymm0, [rel byteswap] + vpshufb xmm1, [rel byteswap] + vmovdqu [p], ymm0 + vmovdqu [p + 32], xmm1 +%endif + +clear_ret: +%ifdef SAFE_DATA + vpxor ymm0, ymm0 + + ;; Clear digest (48B/64B), outer_block (48B/64B) and extra_block (128B) of returned job +%assign I 0 +%rep 2 + cmp qword [state + _ldata_sha512 + (I*_SHA512_LANE_DATA_size) + _job_in_lane_sha512], 0 + jne APPEND(skip_clear_,I) + + ;; Clear digest (48 bytes for SHA-384, 64 bytes for SHA-512 bytes) + vmovdqa [state + _args_digest_sha512 + I*64], ymm0 +%if (SHA_X_DIGEST_SIZE == 384) + vmovdqa [state + _args_digest_sha512 + I*64 + 32], xmm0 +%else + vmovdqa [state + _args_digest_sha512 + I*64 + 32], ymm0 +%endif + + lea lane_data, [state + _ldata_sha512 + (I*_SHA512_LANE_DATA_size)] + ;; Clear first 128 bytes of extra_block +%assign offset 0 +%rep 4 + vmovdqa [lane_data + _extra_block + offset], ymm0 +%assign offset (offset + 32) +%endrep + + ;; Clear first 48 bytes (SHA-384) or 64 bytes (SHA-512) of outer_block + vmovdqu [lane_data + _outer_block], ymm0 +%if (SHA_X_DIGEST_SIZE == 384) + vmovdqa [lane_data + _outer_block + 32], xmm0 +%else + vmovdqu [lane_data + _outer_block + 32], ymm0 +%endif + +APPEND(skip_clear_,I): +%assign I (I+1) +%endrep + +%endif ;; SAFE_DATA + +return: + vzeroupper + + mov rbx, [rsp + _gpr_save + 8*0] + mov rbp, [rsp + _gpr_save + 8*1] + mov r12, [rsp + _gpr_save + 8*2] + mov rsp, [rsp + _rsp_save] ; original SP + + ret + +mksection stack-noexec diff --git a/lib/avx2_t4/mb_mgr_hmac_sha512_submit_ni_avx2.asm b/lib/avx2_t4/mb_mgr_hmac_sha512_submit_ni_avx2.asm new file mode 100644 index 0000000000000000000000000000000000000000..d4bd4450610df741735bb95c55f31479c2dfe23c --- /dev/null +++ b/lib/avx2_t4/mb_mgr_hmac_sha512_submit_ni_avx2.asm @@ -0,0 +1,391 @@ +;; Copyright (c) 2024, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; + +%include "include/os.inc" +%include "include/imb_job.inc" +%include "include/mb_mgr_datastruct.inc" +%include "include/reg_sizes.inc" +%include "include/memcpy.inc" +%include "include/const.inc" + +%ifndef FUNC +%define FUNC submit_job_hmac_sha_512_ni_avx2 +%define SHA_X_DIGEST_SIZE 512 +%endif + +extern sha512_ni_x2_avx2 + +mksection .rodata +default rel +align 16 +byteswap: + dq 0x0001020304050607, 0x08090a0b0c0d0e0f + dq 0x0001020304050607, 0x08090a0b0c0d0e0f + +unused_lane_lens: + dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF + +mksection .text + +%if 1 +%ifdef LINUX +%define arg1 rdi +%define arg2 rsi +%define reg3 rcx +%define reg4 rdx +%else +%define arg1 rcx +%define arg2 rdx +%define reg3 rdi +%define reg4 rsi +%endif + +%define state arg1 +%define job arg2 +%define len2 arg2 + +; idx needs to be in rbp, r13, r14, r16 +%define last_len rbp +%define idx rbp + +%define p r11 +%define start_offset r11 + +%define unused_lanes rbx +%define tmp4 rbx + +%define job_rax rax +%define len rax + +%define size_offset reg3 +%define tmp2 reg3 + +%define lane reg4 +%define tmp3 reg4 + +%define extra_blocks r8 + +%define tmp r9 +%define p2 r9 + +%define lane_data r10 + +%endif + +; Define stack usage +struc STACK +_gpr_save: resq 5 +_rsp_save: resq 1 +endstruc + +; JOB* FUNC(MB_MGR_HMAC_sha_512_OOO *state, IMB_JOB *job) +; arg 1 : rcx : state +; arg 2 : rdx : job +MKGLOBAL(FUNC,function,internal) +FUNC: + mov rax, rsp + sub rsp, STACK_size + mov [rsp + _gpr_save + 8*0], rbx + mov [rsp + _gpr_save + 8*1], rbp + mov [rsp + _gpr_save + 8*2], r12 +%ifndef LINUX + mov [rsp + _gpr_save + 8*3], rsi + mov [rsp + _gpr_save + 8*4], rdi +%endif + mov [rsp + _rsp_save], rax ; original SP + + mov unused_lanes, [state + _unused_lanes_sha512] + movzx lane, BYTE(unused_lanes) + shr unused_lanes, 8 + imul lane_data, lane, _SHA512_LANE_DATA_size + lea lane_data, [state + _ldata_sha512 + lane_data] + mov [state + _unused_lanes_sha512], unused_lanes + mov len, [job + _msg_len_to_hash_in_bytes] + mov tmp, len + shr tmp, 7 ; divide by 128, len in terms of blocks + + mov [lane_data + _job_in_lane_sha512], job + mov dword [lane_data + _outer_done_sha512], 0 + + vmovdqa xmm0, [state + _lens_sha512] + XVPINSRW xmm0, xmm1, extra_blocks, lane, tmp, scale_x16 + ;; reset unused lanes to UINT16_MAX before storing + vpor xmm0, [rel unused_lane_lens] + vmovdqa [state + _lens_sha512], xmm0 + + mov last_len, len + and last_len, 127 + lea extra_blocks, [last_len + 17 + 127] + shr extra_blocks, 7 + mov [lane_data + _extra_blocks_sha512], DWORD(extra_blocks) + + mov p, [job + _src] + add p, [job + _hash_start_src_offset_in_bytes] + mov [state + _args_data_ptr_sha512 + PTR_SZ*lane], p + + cmp len, 128 + jb copy_lt128 + +fast_copy: + add p, len + vmovdqu ymm0, [p - 128 + 0*32] + vmovdqu ymm1, [p - 128 + 1*32] + vmovdqu ymm2, [p - 128 + 2*32] + vmovdqu ymm3, [p - 128 + 3*32] + vmovdqu [lane_data + _extra_block_sha512 + 0*32], ymm0 + vmovdqu [lane_data + _extra_block_sha512 + 1*32], ymm1 + vmovdqu [lane_data + _extra_block_sha512 + 2*32], ymm2 + vmovdqu [lane_data + _extra_block_sha512 + 3*32], ymm3 +end_fast_copy: + + mov size_offset, extra_blocks + shl size_offset, 7 + sub size_offset, last_len + add size_offset, 128-8 + mov [lane_data + _size_offset_sha512], DWORD(size_offset) + mov start_offset, 128 + sub start_offset, last_len + mov [lane_data + _start_offset_sha512], DWORD(start_offset) + + lea tmp, [8*128 + 8*len] + bswap tmp + mov [lane_data + _extra_block_sha512 + size_offset], tmp + + mov tmp, [job + _auth_key_xor_ipad] + + vmovdqu ymm0, [tmp] + vmovdqu ymm1, [tmp + 32] + lea tmp, [lane*8] + vmovdqu [state + _args_digest_sha512 + tmp*8], ymm0 + vmovdqu [state + _args_digest_sha512 + tmp*8 + 32], ymm1 + + test len, ~127 + jnz ge128_bytes + +lt128_bytes: + vmovdqa xmm0, [state + _lens_sha512] + XVPINSRW xmm0, xmm1, tmp, lane, extra_blocks, scale_x16 + vmovdqa [state + _lens_sha512], xmm0 + + lea tmp, [lane_data + _extra_block_sha512 + start_offset] + mov [state + _args_data_ptr_sha512 + PTR_SZ*lane], tmp ;; 8 to hold a UINT8 + mov dword [lane_data + _extra_blocks_sha512], 0 + +ge128_bytes: + cmp unused_lanes, 0xff + jne return_null + jmp start_loop + + align 16 +start_loop: + ; Find min length + vmovdqa xmm0, [state + _lens_sha512] + vphminposuw xmm1, xmm0 + vpextrw DWORD(len2), xmm1, 0 ; min value + vpextrw DWORD(idx), xmm1, 1 ; min index (0...1) + cmp len2, 0 + je len_is_0 + + vpshuflw xmm1, xmm1, 0x00 + vpsubw xmm0, xmm0, xmm1 + vmovdqa [state + _lens_sha512], xmm0 + + ; "state" and "args" are the same address, arg1 + ; len is arg2 + call sha512_ni_x2_avx2 + ; state and idx are intact + +len_is_0: + ; process completed job "idx" + imul lane_data, idx, _SHA512_LANE_DATA_size + lea lane_data, [state + _ldata_sha512 + lane_data] + mov DWORD(extra_blocks), [lane_data + _extra_blocks_sha512] + cmp extra_blocks, 0 + jne proc_extra_blocks + cmp dword [lane_data + _outer_done_sha512], 0 + jne end_loop + +proc_outer: + mov dword [lane_data + _outer_done_sha512], 1 + mov DWORD(size_offset), [lane_data + _size_offset_sha512] + mov qword [lane_data + _extra_block_sha512 + size_offset], 0 + + vmovdqa xmm0, [state + _lens_sha512] + XVPINSRW xmm0, xmm1, tmp, idx, 1, scale_x16 + vmovdqa [state + _lens_sha512], xmm0 + + lea tmp, [lane_data + _outer_block_sha512] + mov job, [lane_data + _job_in_lane_sha512] + mov [state + _args_data_ptr_sha512 + PTR_SZ*idx], tmp + + lea idx, [idx*8] ;; scale up to SHA512_DIGEST_ROW_SIZE (8*8) + vmovdqu ymm0, [state + _args_digest_sha512 + idx*8] + vmovdqu ymm1, [state + _args_digest_sha512 + idx*8 + 32] + vpshufb ymm0, [rel byteswap] + vpshufb ymm1, [rel byteswap] + vmovdqu [lane_data + _outer_block_sha512], ymm0 +%if (SHA_X_DIGEST_SIZE != 384) + vmovdqu [lane_data + _outer_block_sha512+32], ymm1 +%else + vmovdqu [lane_data + _outer_block_sha512+32], xmm1 +%endif + + mov tmp, [job + _auth_key_xor_opad] + vmovdqu ymm0, [tmp] + vmovdqu ymm1, [tmp + 32] + vmovdqu [state + _args_digest_sha512 + idx*8], ymm0 + vmovdqu [state + _args_digest_sha512 + idx*8 + 32], ymm1 + + jmp start_loop + + align 16 +proc_extra_blocks: + mov DWORD(start_offset), [lane_data + _start_offset_sha512] + + vmovdqa xmm0, [state + _lens_sha512] + XVPINSRW xmm0, xmm1, tmp, idx, extra_blocks, scale_x16 + vmovdqa [state + _lens_sha512], xmm0 + + lea tmp, [lane_data + _extra_block_sha512 + start_offset] + mov [state + _args_data_ptr_sha512 + PTR_SZ*idx], tmp ;; idx is index of shortest length message + mov dword [lane_data + _extra_blocks_sha512], 0 + jmp start_loop + + align 16 +copy_lt128: + ;; less than one message block of data + ;; destination extra block but backwards by len from where 0x80 pre-populated + lea p2, [lane_data + _extra_block + 128] + sub p2, len + memcpy_avx2_128_1 p2, p, len, tmp4, tmp2, ymm0, ymm1, ymm2, ymm3 + mov unused_lanes, [state + _unused_lanes_sha512] + jmp end_fast_copy + +return_null: + xor job_rax, job_rax + jmp return + + align 16 +end_loop: + mov job_rax, [lane_data + _job_in_lane_sha512] + mov unused_lanes, [state + _unused_lanes_sha512] + mov qword [lane_data + _job_in_lane_sha512], 0 + or dword [job_rax + _status], IMB_STATUS_COMPLETED_AUTH + shl unused_lanes, 8 + or unused_lanes, idx + mov [state + _unused_lanes_sha512], unused_lanes + + mov p, [job_rax + _auth_tag_output] + + ;; scale idx*64 + shl idx, 6 + +%if (SHA_X_DIGEST_SIZE != 384) + cmp qword [job_rax + _auth_tag_output_len_in_bytes], 32 + jne copy_full_digest +%else + cmp qword [job_rax + _auth_tag_output_len_in_bytes], 24 + jne copy_full_digest +%endif + +%if (SHA_X_DIGEST_SIZE != 384) + ;; copy 32 bytes for SHA512 / 24 bytes for SHA384 + vmovdqu ymm0, [state + _args_digest_sha512 + idx] + vpshufb ymm0, [rel byteswap] + vmovdqu [p], ymm0 +%else + vmovdqu xmm0, [state + _args_digest_sha512 + idx] + vpshufb xmm0, [rel byteswap] + mov QWORD(tmp2), [state + _args_digest_sha512 + idx + 16] + bswap QWORD(tmp2) + vmovdqu [p], xmm0 + mov [p + 16], QWORD(tmp2) +%endif + jmp clear_ret + +copy_full_digest: + ;; copy 64 bytes for SHA512 / 48 bytes for SHA384 +%if (SHA_X_DIGEST_SIZE != 384) + vmovdqu ymm0, [state + _args_digest_sha512 + idx + 0*SHA512_DIGEST_WORD_SIZE] + vmovdqu ymm1, [state + _args_digest_sha512 + idx + 4*SHA512_DIGEST_WORD_SIZE] + vpshufb ymm0, [rel byteswap] + vpshufb ymm1, [rel byteswap] + vmovdqu [p], ymm0 + vmovdqu [p + 32], ymm1 +%else + vmovdqu ymm0, [state + _args_digest_sha512 + idx + 0*SHA512_DIGEST_WORD_SIZE] + vmovdqu xmm1, [state + _args_digest_sha512 + idx + 4*SHA512_DIGEST_WORD_SIZE] + vpshufb ymm0, [rel byteswap] + vpshufb xmm1, [rel byteswap] + vmovdqu [p], ymm0 + vmovdqu [p + 32], xmm1 +%endif + +clear_ret: +%ifdef SAFE_DATA + ;; Clear digest (48B/64B), outer_block (48B/64B) and extra_block (128B) of returned job + vpxor ymm0, ymm0 + vmovdqa [state + _args_digest_sha512 + idx], ymm0 +%if (SHA_X_DIGEST_SIZE == 384) + vmovdqa [state + _args_digest_sha512 + idx + 32], xmm0 +%else + vmovdqa [state + _args_digest_sha512 + idx + 32], ymm0 +%endif + + shr idx, 6 ;; Restore lane idx to 0 or 1 + imul lane_data, idx, _SHA512_LANE_DATA_size + lea lane_data, [state + _ldata_sha512 + lane_data] + + ;; Clear first 128 bytes of extra_block +%assign offset 0 +%rep 4 + vmovdqa [lane_data + _extra_block + offset], ymm0 +%assign offset (offset + 32) +%endrep + + ;; Clear first 48 bytes (SHA-384) or 64 bytes (SHA-512) of outer_block + vmovdqu [lane_data + _outer_block], ymm0 +%if (SHA_X_DIGEST_SIZE == 384) + vmovdqa [lane_data + _outer_block + 32], xmm0 +%else + vmovdqu [lane_data + _outer_block + 32], ymm0 +%endif +%endif ;; SAFE_DATA + +return: + vzeroupper + + mov rbx, [rsp + _gpr_save + 8*0] + mov rbp, [rsp + _gpr_save + 8*1] + mov r12, [rsp + _gpr_save + 8*2] +%ifndef LINUX + mov rsi, [rsp + _gpr_save + 8*3] + mov rdi, [rsp + _gpr_save + 8*4] +%endif + mov rsp, [rsp + _rsp_save] ; original SP + ret + +mksection stack-noexec diff --git a/lib/avx2_t4/sha512_hmac_ni_avx2.asm b/lib/avx2_t4/sha512_hmac_ni_avx2.asm deleted file mode 100644 index 41c21df84ff82bf26dfebabc72bc409906692d75..0000000000000000000000000000000000000000 --- a/lib/avx2_t4/sha512_hmac_ni_avx2.asm +++ /dev/null @@ -1,437 +0,0 @@ -;; -;; Copyright (c) 2023-2024, Intel Corporation -;; -;; Redistribution and use in source and binary forms, with or without -;; modification, are permitted provided that the following conditions are met: -;; -;; * Redistributions of source code must retain the above copyright notice, -;; this list of conditions and the following disclaimer. -;; * Redistributions in binary form must reproduce the above copyright -;; notice, this list of conditions and the following disclaimer in the -;; documentation and/or other materials provided with the distribution. -;; * Neither the name of Intel Corporation nor the names of its contributors -;; may be used to endorse or promote products derived from this software -;; without specific prior written permission. -;; -;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;; - -;; FIPS PUB 180-4, FEDERAL INFORMATION PROCESSING STANDARDS PUBLICATION, Secure Hash Standard (SHS) -;; https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf - -extern sha512_update_ni_x1 - -%include "include/os.inc" -%include "include/constants.inc" -%include "include/reg_sizes.inc" -%include "include/imb_job.inc" -%include "include/memcpy.inc" - -%ifdef LINUX - -%define arg1 rdi -%define arg2 rsi -%define arg3 rdx -%define arg4 rcx - -%define gp1 rax -%define gp2 r8 -%define gp3 r9 -%define gp4 r10 -%define gp5 r11 -%define gp6 arg4 -%define gp7 r12 -%define gp8 r13 -%define gp9 r14 -%define gp10 r15 -%define gp11 rbx -%define gp12 rbp - -%else - -%define arg1 rcx -%define arg2 rdx -%define arg3 r8 -%define arg4 r9 - -%define gp1 rax -%define gp2 r10 -%define gp3 r11 -%define gp4 arg4 -%define gp5 rdi -%define gp6 rsi -%define gp7 r12 -%define gp8 r13 -%define gp9 r14 -%define gp10 r15 -%define gp11 rbx -%define gp12 rbp - -%endif - -%xdefine t1 gp1 -%xdefine t2 gp2 -%xdefine t3 gp3 -%xdefine t4 gp4 - -%xdefine r1 gp12 -%xdefine r2 gp11 -%xdefine r3 gp10 -%xdefine r4 gp9 - -%define arg_job r1 -%define arg_msg r2 -%define arg_msg_length r3 -%define arg_sha_type r4 - -;; HMAC-SHA512/384 stack frame -struc STACK -_B: resb SHA512_BLK_SZ ; two SHA512 blocks (aligned to 16) -_D: resb SHA512_DIGEST_SIZE ; digest -_gpr_save: resq 8 ; space for GPR's -_rsp_save: resq 1 ; space for rsp pointer -endstruc - -mksection .rodata - -align 32 -SHUFF_MASK: - dq 0x0001020304050607, 0x08090a0b0c0d0e0f - dq 0x0001020304050607, 0x08090a0b0c0d0e0f - -;; End-of-Message pattern -align 32 -EOM_32BYTES: - db 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - -;; PAD BLOCKS are used for OPAD where digest of IPAD + message is put into the block. -;; The blocks below fill up top 32 bytes of the block, -;; low 64/48 bytes get filled with the digest followed by EOM. -align 32 -SHA512_OPAD_LENGTH: - ;; last two qwords has to encode length in bits of: BLOCK size + DIGEST size - ;; (128 + 64) * 8 = 1536 = 0x600 in hex - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00 - -align 32 -SHA384_OPAD_LENGTH: - ;; last two qwords has to encode length in bits of: BLOCK size + DIGEST size - ;; (128 + 48) * 8 = 1408 = 0x580 in hex - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - db 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x80 - -mksection .text - -;; ============================================================================= -;; Save registers on the stack and create stack frame -;; ============================================================================= - -%macro FUNC_START 0 - mov rax, rsp - sub rsp, STACK_size - and rsp, -32 - mov [rsp + _rsp_save], rax - mov [rsp + _gpr_save + 0*8], rbx - mov [rsp + _gpr_save + 1*8], rbp - mov [rsp + _gpr_save + 2*8], r12 - mov [rsp + _gpr_save + 3*8], r13 - mov [rsp + _gpr_save + 4*8], r14 - mov [rsp + _gpr_save + 5*8], r15 -%ifidn __OUTPUT_FORMAT__, win64 - mov [rsp + _gpr_save + 6*8], rdi - mov [rsp + _gpr_save + 7*8], rsi -%endif -%endmacro - -;; ============================================================================= -;; Restore registers from the stack -;; ============================================================================= - -%macro FUNC_END 0 - mov rbx, [rsp + _gpr_save + 0*8] - mov rbp, [rsp + _gpr_save + 1*8] - mov r12, [rsp + _gpr_save + 2*8] - mov r13, [rsp + _gpr_save + 3*8] - mov r14, [rsp + _gpr_save + 4*8] - mov r15, [rsp + _gpr_save + 5*8] -%ifidn __OUTPUT_FORMAT__, win64 - mov rdi, [rsp + _gpr_save + 6*8] - mov rsi, [rsp + _gpr_save + 7*8] -%endif - mov rsp, [rsp + _rsp_save] -%endmacro - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; void sha512_tag_store(void *tag_ptr, uint64_t tag_len, ymm1:ymm0 tag) -align 32 -MKGLOBAL(sha512_tag_store,function,internal) -sha512_tag_store: - cmp arg2, 16 - jb .tag_store_1_15 - je .tag_store_16 - - cmp arg2, 32 - je .tag_store_32 - jb .tag_store_17_31 - - cmp arg2, 48 - je .tag_store_48 - jb .tag_store_33_47 - - cmp arg2, 64 - je .tag_store_64 - -.tag_store_49_63: - vmovdqu [arg1 + 0*32], ymm0 - vmovdqu [arg1 + 1*32], xmm1 - vextracti128 xmm0, ymm1, 1 - lea arg1, [arg1 + 48] - sub arg2, 48 - jmp .tag_store_1_15 - -.tag_store_33_47: - vmovdqu [arg1 + 0*32], ymm0 - lea arg1, [arg1 + 32] - vmovdqa ymm0, ymm1 - sub arg2, 32 - jmp .tag_store_1_15 - -.tag_store_17_31: - vmovdqu [arg1 + 0*16], xmm0 - vextracti128 xmm0, ymm0, 1 - lea arg1, [arg1 + 16] - sub arg2, 16 - ;; fall through to store remaining tag bytes - -.tag_store_1_15: - simd_store_avx arg1, xmm0, arg2, t1, t2 - jmp .tag_store_end - -.tag_store_16: - vmovdqu [arg1 + 0*16], xmm0 - jmp .tag_store_end - -.tag_store_32: - vmovdqu [arg1 + 0*32], ymm0 - jmp .tag_store_end - -.tag_store_48: - vmovdqu [arg1 + 0*32], ymm0 - vmovdqu [arg1 + 1*32], xmm1 - jmp .tag_store_end - -.tag_store_64: - vmovdqu [arg1 + 0*32], ymm0 - vmovdqu [arg1 + 1*32], ymm1 - -.tag_store_end: - ret - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IMB_JOB *sha512_384_hmac_submit_ni_avx2(const unsigned sha_type, IMB_JOB *job) -align 32 -MKGLOBAL(sha512_384_hmac_submit_ni_avx2,function,internal) -sha512_384_hmac_submit_ni_avx2: - FUNC_START - - ;; save input arguments - mov arg_job, arg2 - mov arg_sha_type, arg1 - - ;; init the digest with IPAD - mov t1, [arg_job + _auth_key_xor_ipad] - vmovdqu ymm0, [t1 + 0*32] - vmovdqu ymm1, [t1 + 1*32] - vmovdqa [rsp + _D + 0*32], ymm0 - vmovdqa [rsp + _D + 1*32], ymm1 - - ;; update digest for full number of blocks - lea arg1, [rsp + _D] - mov arg2, [arg_job + _src] - add arg2, [arg_job + _hash_start_src_offset] - mov arg_msg, arg2 - mov arg_msg_length, [arg_job + _msg_len_to_hash_in_bytes] - mov arg3, arg_msg_length - shr arg3, 7 ;; msg_length / SHA512_BLK_SZ - call sha512_update_ni_x1 - - ;; prepare partial block - mov DWORD(arg3), SHA512_BLK_SZ - 1 - not arg3 - and arg3, arg_msg_length ;; number of bytes processed already - add arg_msg, arg3 ;; move message pointer to start of the partial block - mov t2, arg_msg_length - sub t2, arg3 ;; t2 = number of bytes left - - xor DWORD(arg1), DWORD(arg1) -.partial_block_copy: - cmp DWORD(arg1), DWORD(t2) - je .partial_block_copy_exit - mov BYTE(t1), [arg_msg + arg1] - mov [rsp + _B + arg1], BYTE(t1) - inc DWORD(arg1) - jmp .partial_block_copy - -.partial_block_copy_exit: - ;; put end of message marker - mov BYTE [rsp + _B + arg1], 0x80 - inc DWORD(arg1) - - xor DWORD(t1), DWORD(t1) -.partial_block_zero: - cmp DWORD(arg1), SHA512_BLK_SZ - je .partial_block_zero_exit - mov [rsp + _B + arg1], BYTE(t1) - inc DWORD(arg1) - jmp .partial_block_zero - -.partial_block_zero_exit: - cmp DWORD(t2), SHA512_BLK_SZ - 16 - jb .add_msg_length - - ;; if length field doesn't fit into this partial block - ;; - compute digest on the current block - ;; - clear the block for the length to be put into it next - lea arg1, [rsp + _D] - lea arg2, [rsp + _B] - mov DWORD(arg3), 1 - call sha512_update_ni_x1 - - ;; clear the block - vpxor xmm0, xmm0, xmm0 - vmovdqa [rsp + _B + 0*32], ymm0 - vmovdqa [rsp + _B + 1*32], ymm0 - vmovdqa [rsp + _B + 2*32], ymm0 - vmovdqa [rsp + _B + 3*32], xmm0 ;; the last 16 bytes will be set below - -.add_msg_length: - lea arg2, [arg_msg_length + SHA512_BLK_SZ] ;; original message length + IPAD block - lea arg1, [arg2 * 8] ;; length in bits - shr arg2, 61 - movbe [rsp + _B + SHA512_BLK_SZ - 2*8], arg2 - movbe [rsp + _B + SHA512_BLK_SZ - 1*8], arg1 - - lea arg1, [rsp + _D] - lea arg2, [rsp + _B] - mov DWORD(arg3), 1 - call sha512_update_ni_x1 - -.process_opad: - cmp DWORD(arg_sha_type), 512 - jne .opad_hmac_sha384 - -.opad_hmac_sha512: - vmovdqa ymm0, [rsp + _D + 0*32] - vmovdqa ymm1, [rsp + _D + 1*32] - vpshufb ymm0, ymm0, [rel SHUFF_MASK] - vpshufb ymm1, ymm1, [rel SHUFF_MASK] - vmovdqa ymm2, [rel EOM_32BYTES] - vmovdqa ymm3, [rel SHA512_OPAD_LENGTH] - vmovdqa [rsp + _B + 0*32], ymm0 - vmovdqa [rsp + _B + 1*32], ymm1 - vmovdqa [rsp + _B + 2*32], ymm2 - vmovdqa [rsp + _B + 3*32], ymm3 - jmp .opad_update - -.opad_hmac_sha384: - vmovdqa ymm0, [rsp + _D + 0*32] - vmovdqa xmm1, [rsp + _D + 1*32] - vpshufb ymm0, ymm0, [rel SHUFF_MASK] - vpshufb xmm1, xmm1, [rel SHUFF_MASK] - vinserti128 ymm1, [rel EOM_32BYTES], 1 - vpxor xmm2, xmm2, xmm2 - vmovdqa ymm3, [rel SHA384_OPAD_LENGTH] - vmovdqa [rsp + _B + 0*32], ymm0 - vmovdqa [rsp + _B + 1*32], ymm1 - vmovdqa [rsp + _B + 2*32], ymm2 - vmovdqa [rsp + _B + 3*32], ymm3 - -.opad_update: - ;; init the digest with OPAD - mov t1, [arg_job + _auth_key_xor_opad] - vmovdqu ymm0, [t1 + 0*32] - vmovdqu ymm1, [t1 + 1*32] - vmovdqa [rsp + _D + 0*32], ymm0 - vmovdqa [rsp + _D + 1*32], ymm1 - - lea arg1, [rsp + _D] - lea arg2, [rsp + _B] - mov DWORD(arg3), 1 - call sha512_update_ni_x1 - -.tag_store_start: - ;; byte swap the digest and write it back - lea arg1, [rsp + _D] - vmovdqa ymm0, [arg1 + 0*32] - vmovdqa ymm1, [arg1 + 1*32] - vpshufb ymm0, ymm0, [rel SHUFF_MASK] - vpshufb ymm1, ymm1, [rel SHUFF_MASK] - - mov arg1, [arg_job + _auth_tag_output] - mov arg2, [arg_job + _auth_tag_output_len_in_bytes] - call sha512_tag_store - -%ifdef SAFE_DATA - vpxor xmm0, xmm0, xmm0 - vpxor xmm1, xmm1, xmm1 - vpxor xmm2, xmm2, xmm2 - vpxor xmm3, xmm3, xmm3 - - vmovdqu [rsp + _B + 0*32], ymm0 - vmovdqu [rsp + _B + 1*32], ymm0 - vmovdqu [rsp + _B + 2*32], ymm0 - vmovdqu [rsp + _B + 3*32], ymm0 -%endif - vzeroupper - - mov rax, arg_job - or dword [arg_job + _status], IMB_STATUS_COMPLETED_AUTH - FUNC_END - ret - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IMB_JOB *submit_job_hmac_sha_512_ni_avx2(MB_MGR_HMAC_SHA_512_OOO *state, IMB_JOB *job) -align 32 -MKGLOBAL(submit_job_hmac_sha_512_ni_avx2,function,internal) -submit_job_hmac_sha_512_ni_avx2: - mov DWORD(arg1), 512 - jmp sha512_384_hmac_submit_ni_avx2 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IMB_JOB *submit_job_hmac_sha_384_ni_avx2(MB_MGR_SHA384_OOO *state, IMB_JOB *job) -align 32 -MKGLOBAL(submit_job_hmac_sha_384_ni_avx2,function,internal) -submit_job_hmac_sha_384_ni_avx2: - mov DWORD(arg1), 384 - jmp sha512_384_hmac_submit_ni_avx2 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IMB_JOB *flush_job_hmac_sha_512_ni_avx2(MB_MGR_SHA512_OOO *state) -;; IMB_JOB *flush_job_hmac_sha_384_ni_avx2(MB_MGR_SHA384_OOO *state) -align 32 -MKGLOBAL(flush_job_hmac_sha_512_ni_avx2,function,internal) -MKGLOBAL(flush_job_hmac_sha_384_ni_avx2,function,internal) -flush_job_hmac_sha_512_ni_avx2: -flush_job_hmac_sha_384_ni_avx2: - xor rax, rax - ret - -mksection stack-noexec diff --git a/lib/avx2_t4/sha512_x2_ni_avx2.asm b/lib/avx2_t4/sha512_x2_ni_avx2.asm new file mode 100644 index 0000000000000000000000000000000000000000..436182866ea43f01bf0ada8c11edf9297e48a4c5 --- /dev/null +++ b/lib/avx2_t4/sha512_x2_ni_avx2.asm @@ -0,0 +1,412 @@ +;; +;; Copyright (c) 2024, Intel Corporation +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are met: +;; +;; * Redistributions of source code must retain the above copyright notice, +;; this list of conditions and the following disclaimer. +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; * Neither the name of Intel Corporation nor the names of its contributors +;; may be used to endorse or promote products derived from this software +;; without specific prior written permission. +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;; + +;; =========================================================== +;; NOTE about comment format: +;; +;; xmm = a b c d +;; ^ ^ +;; | | +;; MSB--+ +--LSB +;; +;; a - most significant word in `ymm` +;; d - least significant word in `ymm` +;; =========================================================== + +%use smartalign + +%include "include/os.inc" +%include "include/clear_regs.inc" +%include "include/reg_sizes.inc" +%include "include/mb_mgr_datastruct.inc" + +; resdq = res0 => 16 bytes +struc frame +.ABEF_SAVE resy 1 +.CDGH_SAVE resy 1 +.ABEF_SAVEb resy 1 +.CDGH_SAVEb resy 1 +endstruc + +%ifdef LINUX +%define arg1 rdi +%define arg2 rsi +%define arg3 rdx +%define arg4 rcx +%else +%define arg1 rcx +%define arg2 rdx +%define arg3 r8 +%define arg4 r9 +%endif + +%define args arg1 +%define NUM_BLKS arg2 + +%define INP arg3 +%define INPb arg4 + +%define SHA512_CONSTS rax + +%define MSG ymm0 +%define STATE0 ymm1 +%define STATE1 ymm2 +%define MSGTMP0 ymm3 +%define MSGTMP1 ymm4 +%define MSGTMP2 ymm5 + +%define YTMP0 ymm6 +%define YTMP1 ymm7 + +%define STATE0b ymm8 +%define STATE1b ymm9 +%define MSGb ymm10 + +%define YTMP2 ymm11 +%define YTMP3 ymm12 + +%define MSGTMP0b ymm13 +%define MSGTMP1b ymm14 +%define MSGTMP2b ymm15 + +%define GP_STORAGE 6*8 +%ifndef LINUX +%define XMM_STORAGE 10*16 +%else +%define XMM_STORAGE 0 +%endif + +%define VARIABLE_OFFSET XMM_STORAGE + GP_STORAGE +%define GP_OFFSET XMM_STORAGE + +%macro FUNC_SAVE 0 + mov r11, rsp + sub rsp, VARIABLE_OFFSET + and rsp, ~31 ; align rsp to 32 bytes + + mov [rsp + 0*8], rbx + mov [rsp + 1*8], rbp + mov [rsp + 2*8], r12 +%ifndef LINUX + mov [rsp + 3*8], rsi + mov [rsp + 4*8], rdi + vmovdqa [rsp + 3*16], xmm6 + vmovdqa [rsp + 4*16], xmm7 + vmovdqa [rsp + 5*16], xmm8 + vmovdqa [rsp + 6*16], xmm9 + vmovdqa [rsp + 7*16], xmm10 + vmovdqa [rsp + 8*16], xmm11 + vmovdqa [rsp + 9*16], xmm12 + vmovdqa [rsp + 10*16], xmm13 + vmovdqa [rsp + 11*16], xmm14 + vmovdqa [rsp + 12*16], xmm15 +%endif ; LINUX + mov [rsp + 5*8], r11 ;; rsp pointer +%endmacro + +%macro FUNC_RESTORE 0 + mov rbx, [rsp + 0*8] + mov rbp, [rsp + 1*8] + mov r12, [rsp + 2*8] +%ifndef LINUX + mov rsi, [rsp + 3*8] + mov rdi, [rsp + 4*8] + vmovdqa xmm6, [rsp + 3*16] + vmovdqa xmm7, [rsp + 4*16] + vmovdqa xmm8, [rsp + 5*16] + vmovdqa xmm9, [rsp + 6*16] + vmovdqa xmm10, [rsp + 7*16] + vmovdqa xmm11, [rsp + 8*16] + vmovdqa xmm12, [rsp + 9*16] + vmovdqa xmm13, [rsp + 10*16] + vmovdqa xmm14, [rsp + 11*16] + vmovdqa xmm15, [rsp + 12*16] +%endif ; LINUX + mov rsp, [rsp + 5*8] ;; rsp pointer +%endmacro + +%macro SHA512ROUNDS4 7 +%define %%Y0 %1 +%define %%Y1 %2 +%define %%Y2 %3 +%define %%Y3 %4 +%define %%Y4 %5 +%define %%Y6 %6 +%define %%I %7 + + vpaddq %%Y0, %%Y3, [SHA512_CONSTS+32*%%I] + vpermq YTMP3, %%Y3, 0x1b + vpermq YTMP1, %%Y6, 0x39 + vpblendd YTMP1, YTMP3, YTMP1, 0x3f + vpaddq %%Y4, %%Y4, YTMP1 + vsha512msg2 %%Y4, %%Y3 + vsha512rnds2 %%Y2, %%Y1, XWORD(%%Y0) + vperm2i128 %%Y0, %%Y0, %%Y0, 0x01 + vsha512rnds2 %%Y1, %%Y2, XWORD(%%Y0) + vsha512msg1 %%Y6, XWORD(%%Y3) +%endmacro + +%macro SHA512ROUNDS4_FINAL 7 +%define %%Y0 %1 +%define %%Y1 %2 +%define %%Y2 %3 +%define %%Y3 %4 +%define %%Y4 %5 +%define %%Y6 %6 +%define %%I %7 + + vpaddq %%Y0, %%Y3, [SHA512_CONSTS+32*%%I] + vpermq YTMP3, %%Y3, 0x1b + vpermq YTMP1, %%Y6, 0x39 + vpblendd YTMP1, YTMP3, YTMP1, 0x3f + vpaddq %%Y4, %%Y4, YTMP1 + vsha512msg2 %%Y4, %%Y3 + vsha512rnds2 %%Y2, %%Y1, XWORD(%%Y0) + vperm2i128 %%Y0, %%Y0, %%Y0, 0x01 + vsha512rnds2 %%Y1, %%Y2, XWORD(%%Y0) +%endmacro + +;; re-use symbols from AVX codebase +extern SHA512_K_AVX + +mksection .rodata +default rel + +align 32 +SHUF_MASK: + dq 0x0001020304050607, 0x08090a0b0c0d0e0f + dq 0x0001020304050607, 0x08090a0b0c0d0e0f + +mksection .text +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; void sha512_ni_x2_avx2(SHA512_ARGS *args, UINT64 size_in_blocks) +;; arg1 : pointer to args +;; arg2 : size (in blocks) ;; assumed to be >= 1 +align 32 +MKGLOBAL(sha512_ni_x2_avx2,function,internal) +sha512_ni_x2_avx2: + mov r11, rsp + sub rsp, frame_size + and rsp, -32 + + or NUM_BLKS, NUM_BLKS + je .done_hash + + ;; load input pointers + mov INP, [args + _data_ptr_sha512 + 0*PTR_SZ] + mov INPb, [args + _data_ptr_sha512 + 1*PTR_SZ] + + ;; load constants pointer + lea SHA512_CONSTS, [rel SHA512_K_AVX] + + ;; load current hash value and transform + vmovdqu STATE0, [args + _args_digest_sha512 + 0*SHA512NI_DIGEST_ROW_SIZE] + vmovdqu STATE1, [args + _args_digest_sha512 + 0*SHA512NI_DIGEST_ROW_SIZE + 32] + vmovdqu STATE0b, [args + _args_digest_sha512 + 1*SHA512NI_DIGEST_ROW_SIZE] + vmovdqu STATE1b, [args + _args_digest_sha512 + 1*SHA512NI_DIGEST_ROW_SIZE + 32] + + vperm2i128 YTMP1, STATE0, STATE1, 0x20 + vperm2i128 YTMP0, STATE0b, STATE1b, 0x20 + vperm2i128 STATE1, STATE0, STATE1, 0x31 + vperm2i128 STATE1b, STATE0b, STATE1b, 0x31 + vpermq STATE0, YTMP1, 0x1b + vpermq STATE0b, YTMP0, 0x1b + vpermq STATE1, STATE1, 0x1b + vpermq STATE1b, STATE1b, 0x1b + +align 32 +.block_loop: + ;; Save digests + vmovdqa [rsp + frame.ABEF_SAVE], STATE0 + vmovdqa [rsp + frame.CDGH_SAVE], STATE1 + vmovdqa [rsp + frame.ABEF_SAVEb], STATE0b + vmovdqa [rsp + frame.CDGH_SAVEb], STATE1b + + ;; R0- R3 + vmovdqu MSG, [INP+32*0] + vmovdqu MSGb, [INPb+32*0] + vpshufb MSG, MSG, [SHUF_MASK] + vpshufb MSGb, MSGb, [SHUF_MASK] + vmovdqu MSGTMP0, MSG + vmovdqu MSGTMP0b, MSGb + vpaddq MSG, MSG, [SHA512_CONSTS+32*0] + vpaddq MSGb, MSGb, [SHA512_CONSTS+32*0] + vsha512rnds2 STATE1, STATE0, XWORD(MSG) + vsha512rnds2 STATE1b, STATE0b, XWORD(MSGb) + vperm2i128 MSG, MSG, MSG, 0x01 + vperm2i128 MSGb, MSGb, MSGb, 0x01 + vsha512rnds2 STATE0, STATE1, XWORD(MSG) + vsha512rnds2 STATE0b, STATE1b, XWORD(MSGb) + + ;; R4-7 + vmovdqu MSG, [INP+32*1] + vmovdqu MSGb, [INPb+32*1] + vpshufb MSG, MSG, [SHUF_MASK] + vpshufb MSGb, MSGb, [SHUF_MASK] + vmovdqu MSGTMP1, MSG + vmovdqu MSGTMP1b, MSGb + vpaddq MSG, MSG, [SHA512_CONSTS+32*1] + vpaddq MSGb, MSGb, [SHA512_CONSTS+32*1] + vsha512rnds2 STATE1, STATE0, XWORD(MSG) + vsha512rnds2 STATE1b, STATE0b, XWORD(MSGb) + vperm2i128 MSG, MSG, MSG, 0x01 + vperm2i128 MSGb, MSGb, MSGb, 0x01 + vsha512rnds2 STATE0, STATE1, XWORD(MSG) + vsha512rnds2 STATE0b, STATE1b, XWORD(MSGb) + vsha512msg1 MSGTMP0, XWORD(MSGTMP1) + vsha512msg1 MSGTMP0b, XWORD(MSGTMP1b) + + ;; R8-R11 + vmovdqu MSG, [INP+32*2] + vmovdqu MSGb, [INPb+32*2] + vpshufb MSG, MSG, [SHUF_MASK] + vpshufb MSGb, MSGb, [SHUF_MASK] + vmovdqu MSGTMP2, MSG + vmovdqu MSGTMP2b, MSGb + + + vpaddq MSG, MSG, [SHA512_CONSTS+32*2] + vpaddq MSGb, MSGb, [SHA512_CONSTS+32*2] + vsha512rnds2 STATE1, STATE0, XWORD(MSG) + vsha512rnds2 STATE1b, STATE0b, XWORD(MSGb) + vperm2i128 MSG, MSG, MSG, 0x01 + vperm2i128 MSGb, MSGb, MSGb, 0x01 + vsha512rnds2 STATE0, STATE1, XWORD(MSG) + vsha512rnds2 STATE0b, STATE1b, XWORD(MSGb) + vsha512msg1 MSGTMP1, XWORD(MSGTMP2) + vsha512msg1 MSGTMP1b, XWORD(MSGTMP2b) + + ;; R12-15 + vmovdqu MSG, [INP+32*3] + vmovdqu MSGb, [INPb+32*3] + vpshufb MSG, MSG, [SHUF_MASK] + vpshufb MSGb, MSGb, [SHUF_MASK] + vmovdqu YTMP0, MSG + vmovdqu YTMP2, MSGb + + ;; R16-75 + SHA512ROUNDS4 MSG, STATE0, STATE1, YTMP0, MSGTMP0, MSGTMP2, 3 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, YTMP2, MSGTMP0b, MSGTMP2b, 3 + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP0, MSGTMP1, YTMP0, 4 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP0b, MSGTMP1b, YTMP2, 4 + + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP1, MSGTMP2, MSGTMP0, 5 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP1b, MSGTMP2b, MSGTMP0b, 5 + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP2, YTMP0, MSGTMP1, 6 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP2b, YTMP2, MSGTMP1b, 6 + + SHA512ROUNDS4 MSG, STATE0, STATE1, YTMP0, MSGTMP0, MSGTMP2, 7 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, YTMP2, MSGTMP0b, MSGTMP2b, 7 + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP0, MSGTMP1, YTMP0, 8 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP0b, MSGTMP1b, YTMP2, 8 + + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP1, MSGTMP2, MSGTMP0, 9 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP1b, MSGTMP2b, MSGTMP0b, 9 + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP2, YTMP0, MSGTMP1, 10 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP2b, YTMP2, MSGTMP1b, 10 + + SHA512ROUNDS4 MSG, STATE0, STATE1, YTMP0, MSGTMP0, MSGTMP2, 11 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, YTMP2, MSGTMP0b, MSGTMP2b, 11 + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP0, MSGTMP1, YTMP0, 12 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP0b, MSGTMP1b, YTMP2, 12 + + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP1, MSGTMP2, MSGTMP0, 13 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP1b, MSGTMP2b, MSGTMP0b, 13 + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP2, YTMP0, MSGTMP1, 14 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP2b, YTMP2, MSGTMP1b, 14 + + SHA512ROUNDS4 MSG, STATE0, STATE1, YTMP0, MSGTMP0, MSGTMP2, 15 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, YTMP2, MSGTMP0b, MSGTMP2b, 15 + SHA512ROUNDS4 MSG, STATE0, STATE1, MSGTMP0, MSGTMP1, YTMP0, 16 + SHA512ROUNDS4 MSGb, STATE0b, STATE1b, MSGTMP0b, MSGTMP1b, YTMP2, 16 + + SHA512ROUNDS4_FINAL MSG, STATE0, STATE1, MSGTMP1, MSGTMP2, MSGTMP0, 17 + SHA512ROUNDS4_FINAL MSGb, STATE0b, STATE1b, MSGTMP1b, MSGTMP2b, MSGTMP0b, 17 + SHA512ROUNDS4_FINAL MSG, STATE0, STATE1, MSGTMP2, YTMP0, MSGTMP1, 18 + SHA512ROUNDS4_FINAL MSGb, STATE0b, STATE1b, MSGTMP2b, YTMP2, MSGTMP1b, 18 + + ;; R76-79 + vpaddq MSG, YTMP0, [SHA512_CONSTS+32*19] + vpaddq MSGb, YTMP2, [SHA512_CONSTS+32*19] + vsha512rnds2 STATE1, STATE0, XWORD(MSG) + vsha512rnds2 STATE1b, STATE0b, XWORD(MSGb) + vperm2i128 MSG, MSG, MSG, 0x01 + vperm2i128 MSGb, MSGb, MSGb, 0x01 + vsha512rnds2 STATE0, STATE1, XWORD(MSG) + vsha512rnds2 STATE0b, STATE1b, XWORD(MSGb) + + vpaddq STATE0, STATE0, [rsp + frame.ABEF_SAVE] + vpaddq STATE1, STATE1, [rsp + frame.CDGH_SAVE] + vpaddq STATE0b, STATE0b, [rsp + frame.ABEF_SAVEb] + vpaddq STATE1b, STATE1b, [rsp + frame.CDGH_SAVEb] + + lea INP, [INP+128] + lea INPb, [INPb+128] + + dec NUM_BLKS + jne .block_loop + + ;; Update input pointers + mov [args + _data_ptr_sha512 + 0*PTR_SZ], INP + mov [args + _data_ptr_sha512 + 1*PTR_SZ], INPb + + ; Reorder and write back the hash value + vperm2i128 MSGTMP0, STATE0, STATE1, 0x31 + vperm2i128 MSGTMP1, STATE0b, STATE1b, 0x31 + vperm2i128 MSGTMP2, STATE0, STATE1, 0x20 + vperm2i128 YTMP0, STATE0b, STATE1b, 0x20 + vpermq STATE0, MSGTMP0, 0xb1 + vpermq STATE1, MSGTMP2, 0xb1 + vpermq STATE0b, MSGTMP1, 0xb1 + vpermq STATE1b, YTMP0, 0xb1 + + ;; update digests + vmovdqu [args + _args_digest_sha512 + 0*SHA512NI_DIGEST_ROW_SIZE], STATE0 + vmovdqu [args + _args_digest_sha512 + 0*SHA512NI_DIGEST_ROW_SIZE + 32], STATE1 + vmovdqu [args + _args_digest_sha512 + 1*SHA512NI_DIGEST_ROW_SIZE], STATE0b + vmovdqu [args + _args_digest_sha512 + 1*SHA512NI_DIGEST_ROW_SIZE + 32], STATE1b + + vzeroupper + +.done_hash: + + mov rsp, r11 + + ret + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; void call_sha512_ni_x2_avx2_from_c(SHA512_ARGS *args, UINT64 size_in_blocks); +MKGLOBAL(call_sha512_ni_x2_avx2_from_c,function,internal) +call_sha512_ni_x2_avx2_from_c: + FUNC_SAVE + call sha512_ni_x2_avx2 + FUNC_RESTORE + ret + +mksection stack-noexec diff --git a/lib/avx2_t4/sha_ni_avx2.c b/lib/avx2_t4/sha_ni_avx2.c index 7cdab60a7383591dbb54374b49053c16ac1b2e95..2a08e9921cd5388a0fa0306f776d77f043a6e109 100644 --- a/lib/avx2_t4/sha_ni_avx2.c +++ b/lib/avx2_t4/sha_ni_avx2.c @@ -26,6 +26,7 @@ *******************************************************************************/ #include "include/sha_generic.h" +#include "include/sha_mb_mgr.h" #include "include/arch_avx2_type4.h" /* ========================================================================== */ @@ -68,54 +69,60 @@ sha512_ni_avx2(const void *data, const uint64_t length, void *digest) /* ========================================================================== */ /* - * SHA384 API for JOB API + * SHA384 MB API for JOB API */ IMB_JOB * submit_job_sha384_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { - const void *msg = (job->src + job->hash_start_src_offset_in_bytes); - const uint64_t length = job->msg_len_to_hash_in_bytes; - uint64_t tag[8]; - +#ifdef SMX_NI + return submit_flush_job_sha_512(state, job, 2, 1, 384, IMB_SHA_384_BLOCK_SIZE, + SHA384_PAD_SIZE, call_sha512_ni_x2_avx2_from_c, 1); +#else (void) state; - - sha384_ni_avx2(msg, length, tag); - memcpy(job->auth_tag_output, tag, job->auth_tag_output_len_in_bytes); - job->status |= IMB_STATUS_COMPLETED_AUTH; - return job; + (void) job; + return NULL; +#endif /* ifdef SMX_NI */ } IMB_JOB * flush_job_sha384_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { +#ifdef SMX_NI + return submit_flush_job_sha_512(state, job, 2, 0, 384, IMB_SHA_384_BLOCK_SIZE, + SHA384_PAD_SIZE, call_sha512_ni_x2_avx2_from_c, 1); +#else (void) state; (void) job; return NULL; +#endif /* ifdef SMX_NI */ } /* ========================================================================== */ /* - * SHA512 API for JOB API + * SHA512 MB API for JOB API */ IMB_JOB * submit_job_sha512_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { - const void *msg = (job->src + job->hash_start_src_offset_in_bytes); - const uint64_t length = job->msg_len_to_hash_in_bytes; - uint64_t tag[8]; - +#ifdef SMX_NI + return submit_flush_job_sha_512(state, job, 2, 1, 512, IMB_SHA_512_BLOCK_SIZE, + SHA512_PAD_SIZE, call_sha512_ni_x2_avx2_from_c, 1); +#else (void) state; - - sha512_ni_avx2(msg, length, tag); - memcpy(job->auth_tag_output, tag, job->auth_tag_output_len_in_bytes); - job->status |= IMB_STATUS_COMPLETED_AUTH; - return job; + (void) job; + return NULL; +#endif /* ifdef SMX_NI */ } IMB_JOB * flush_job_sha512_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { +#ifdef SMX_NI + return submit_flush_job_sha_512(state, job, 2, 0, 512, IMB_SHA_512_BLOCK_SIZE, + SHA512_PAD_SIZE, call_sha512_ni_x2_avx2_from_c, 1); +#else (void) state; (void) job; return NULL; +#endif /* ifdef SMX_NI */ } diff --git a/lib/avx512_t1/mb_mgr_avx512_t1.c b/lib/avx512_t1/mb_mgr_avx512_t1.c index 334b016f9b6fbd843039077712f6f5bae2bd2dc3..813d110026d8f6d5d1cd3b4b7bfaa288bb74a199 100644 --- a/lib/avx512_t1/mb_mgr_avx512_t1.c +++ b/lib/avx512_t1/mb_mgr_avx512_t1.c @@ -74,6 +74,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx512_t1 #define SUBMIT_HASH_BURST submit_hash_burst_avx512_t1 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx512_t1 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx512_t1 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx512_t1 #define SET_SUITE_ID_FN set_suite_id_avx512_t1 /* Hash */ @@ -442,6 +444,9 @@ init_mb_mgr_avx512_t1_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX512; + /* Set architecture type for future checks */ + state->used_arch_type = 1; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -465,6 +470,8 @@ init_mb_mgr_avx512_t1_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx512; diff --git a/lib/avx512_t1/sha_mb_avx512.c b/lib/avx512_t1/sha_mb_avx512.c index 4c2cfc07c7d241c0ba064d7b9ed5b58859421d32..30bc20fa57b0fd583a91e86b4bfded40d0c7cf0e 100644 --- a/lib/avx512_t1/sha_mb_avx512.c +++ b/lib/avx512_t1/sha_mb_avx512.c @@ -111,7 +111,7 @@ IMB_JOB * submit_job_sha384_avx512(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 8, 1, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x8_avx512_from_c); + SHA384_PAD_SIZE, call_sha512_x8_avx512_from_c, 0); } IMB_DLL_LOCAL @@ -119,7 +119,7 @@ IMB_JOB * flush_job_sha384_avx512(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 8, 0, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x8_avx512_from_c); + SHA384_PAD_SIZE, call_sha512_x8_avx512_from_c, 0); } /* ========================================================================== */ @@ -132,7 +132,7 @@ IMB_JOB * submit_job_sha512_avx512(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 8, 1, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x8_avx512_from_c); + SHA512_PAD_SIZE, call_sha512_x8_avx512_from_c, 0); } IMB_DLL_LOCAL @@ -140,5 +140,5 @@ IMB_JOB * flush_job_sha512_avx512(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 8, 0, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x8_avx512_from_c); + SHA512_PAD_SIZE, call_sha512_x8_avx512_from_c, 0); } diff --git a/lib/avx512_t2/aes_cntr_ccm_api_by16_vaes_avx512.asm b/lib/avx512_t2/aes_cntr_ccm_api_by16_vaes_avx512.asm index 5e81d4f911e78d480d931ba607c5917e1535a7a2..175d76ce3dc6db3bdd9aff255b45418e9e720bed 100644 --- a/lib/avx512_t2/aes_cntr_ccm_api_by16_vaes_avx512.asm +++ b/lib/avx512_t2/aes_cntr_ccm_api_by16_vaes_avx512.asm @@ -28,12 +28,14 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %include "include/aes_cntr_by16_vaes_avx512.inc" +%include "include/cet.inc" ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;IMB_JOB * aes_cntr_ccm_128_vaes_avx512(IMB_JOB *job) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; MKGLOBAL(aes_cntr_ccm_128_vaes_avx512,function,internal) aes_cntr_ccm_128_vaes_avx512: + endbranch64 FUNC_SAVE CNTR ;; arg1 - [in] job ;; arg2 - [in] NROUNDS @@ -48,6 +50,7 @@ aes_cntr_ccm_128_vaes_avx512: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; MKGLOBAL(aes_cntr_ccm_256_vaes_avx512,function,internal) aes_cntr_ccm_256_vaes_avx512: + endbranch64 FUNC_SAVE CNTR ;; arg1 - [in] job ;; arg2 - [in] NROUNDS diff --git a/lib/avx512_t2/mb_mgr_aes128_ccm_auth_submit_flush_x16_vaes_avx512.asm b/lib/avx512_t2/mb_mgr_aes128_ccm_auth_submit_flush_x16_vaes_avx512.asm index f04a6d81342829fb0879d40c08406ee9bfff3f40..1e30b69f8135762f7c229b66722037ae90103b3e 100644 --- a/lib/avx512_t2/mb_mgr_aes128_ccm_auth_submit_flush_x16_vaes_avx512.asm +++ b/lib/avx512_t2/mb_mgr_aes128_ccm_auth_submit_flush_x16_vaes_avx512.asm @@ -32,6 +32,7 @@ %include "include/const.inc" %include "include/memcpy.inc" %include "include/clear_regs.inc" +%include "include/cet.inc" %ifndef AES_CBC_MAC %define AES_CBC_MAC aes128_cbc_mac_vaes_avx512 @@ -726,12 +727,14 @@ align 64 ; arg 2 : job MKGLOBAL(SUBMIT_JOB_AES_CCM_AUTH,function,internal) SUBMIT_JOB_AES_CCM_AUTH: + endbranch64 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_AVX SUBMIT ; IMB_JOB * flush_job_aes128/256_ccm_auth_vaes_avx512(MB_MGR_CCM_OOO *state) ; arg 1 : state MKGLOBAL(FLUSH_JOB_AES_CCM_AUTH,function,internal) FLUSH_JOB_AES_CCM_AUTH: + endbranch64 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_AVX FLUSH mksection stack-noexec diff --git a/lib/avx512_t2/mb_mgr_avx512_t2.c b/lib/avx512_t2/mb_mgr_avx512_t2.c index bcc66811d12b9460f6966acc21f9a25e886cde93..ac375077addfe4d6c2aff590f3f8a2c79444b8a7 100644 --- a/lib/avx512_t2/mb_mgr_avx512_t2.c +++ b/lib/avx512_t2/mb_mgr_avx512_t2.c @@ -73,6 +73,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx512_t2 #define SUBMIT_HASH_BURST submit_hash_burst_avx512_t2 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx512_t2 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx512_t2 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx512_t2 #define GET_NEXT_BURST get_next_burst_avx512_t2 #define SUBMIT_BURST submit_burst_avx512_t2 #define SUBMIT_BURST_NOCHECK submit_burst_nocheck_avx512_t2 @@ -449,6 +451,9 @@ init_mb_mgr_avx512_t2_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX512; + /* Set architecture type for future checks */ + state->used_arch_type = 2; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -472,6 +477,8 @@ init_mb_mgr_avx512_t2_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx512; diff --git a/lib/avx_t1/aes128_cntr_by8_avx.asm b/lib/avx_t1/aes128_cntr_by8_avx.asm index af1407bf3220f9ba9990326e85d8dd0983fed07c..b0d80ace5365a41056da9c04eedf4dce42f0b83a 100644 --- a/lib/avx_t1/aes128_cntr_by8_avx.asm +++ b/lib/avx_t1/aes128_cntr_by8_avx.asm @@ -31,6 +31,7 @@ %include "include/const.inc" %include "include/reg_sizes.inc" %include "include/clear_regs.inc" +%include "include/cet.inc" ; routine to do AES128 CNTR enc/decrypt "by8" ; XMM registers are clobbered. Saving/restoring must be done at a higher level @@ -593,6 +594,7 @@ align 32 ; arg 1 : job MKGLOBAL(aes_cntr_ccm_128_avx,function,internal) aes_cntr_ccm_128_avx: + endbranch64 DO_CNTR CCM %else ;; aes_cntr_128_avx(void *in, void *IV, void *keys, void *out, UINT64 num_bytes, diff --git a/lib/avx_t1/aes256_cntr_by8_avx.asm b/lib/avx_t1/aes256_cntr_by8_avx.asm index 61502bdffed2fcc72dfc9109be8f7741d22c63f3..237f9c4530dda64400d36261a052aaac5ca7174f 100644 --- a/lib/avx_t1/aes256_cntr_by8_avx.asm +++ b/lib/avx_t1/aes256_cntr_by8_avx.asm @@ -31,6 +31,7 @@ %include "include/const.inc" %include "include/reg_sizes.inc" %include "include/clear_regs.inc" +%include "include/cet.inc" ; routine to do AES256 CNTR enc/decrypt "by8" ; XMM registers are clobbered. Saving/restoring must be done at a higher level @@ -587,6 +588,7 @@ align 32 ; arg 1 : job MKGLOBAL(aes_cntr_ccm_256_avx,function,internal) aes_cntr_ccm_256_avx: + endbranch64 DO_CNTR CCM %else ;; aes_cntr_256_avx(void *in, void *IV, void *keys, void *out, UINT64 num_bytes, diff --git a/lib/avx_t1/mb_mgr_aes128_ccm_auth_submit_flush_x8_avx.asm b/lib/avx_t1/mb_mgr_aes128_ccm_auth_submit_flush_x8_avx.asm index 2f43a83fc32d2a4fe0ae0ac5120e11a1ad832e43..33064e70c48ffd51f4c1dad83d34e7bf251dbf83 100644 --- a/lib/avx_t1/mb_mgr_aes128_ccm_auth_submit_flush_x8_avx.asm +++ b/lib/avx_t1/mb_mgr_aes128_ccm_auth_submit_flush_x8_avx.asm @@ -31,6 +31,7 @@ %include "include/reg_sizes.inc" %include "include/const.inc" %include "include/memcpy.inc" +%include "include/cet.inc" %ifndef AES_CBC_MAC @@ -595,12 +596,14 @@ align 64 ; arg 2 : job MKGLOBAL(SUBMIT_JOB_AES_CCM_AUTH,function,internal) SUBMIT_JOB_AES_CCM_AUTH: + endbranch64 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_AVX SUBMIT ; IMB_JOB * flush_job_aes128/256_ccm_auth_avx(MB_MGR_CCM_OOO *state) ; arg 1 : state MKGLOBAL(FLUSH_JOB_AES_CCM_AUTH,function,internal) FLUSH_JOB_AES_CCM_AUTH: + endbranch64 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_AVX FLUSH mksection stack-noexec diff --git a/lib/avx_t1/mb_mgr_avx_t1.c b/lib/avx_t1/mb_mgr_avx_t1.c index b8f2a617edd216ad78affafedec124837d506a2a..52ab3f6e0eb3c87767523ab3318abfefac0c67da 100644 --- a/lib/avx_t1/mb_mgr_avx_t1.c +++ b/lib/avx_t1/mb_mgr_avx_t1.c @@ -66,6 +66,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx_t1 #define SUBMIT_HASH_BURST submit_hash_burst_avx_t1 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx_t1 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx_t1 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx_t1 #define SET_SUITE_ID_FN set_suite_id_avx_t1 /* Hash */ @@ -345,6 +347,9 @@ init_mb_mgr_avx_t1_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX; + /* Set architecture type for future checks */ + state->used_arch_type = 1; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -368,6 +373,8 @@ init_mb_mgr_avx_t1_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx; diff --git a/lib/avx_t1/sha_mb_avx.c b/lib/avx_t1/sha_mb_avx.c index e452f13244a2b50b7610a3997ece6347cd71db15..4c7f269d8ef7c3a01c3949ee23e2dd5cbb00bd79 100644 --- a/lib/avx_t1/sha_mb_avx.c +++ b/lib/avx_t1/sha_mb_avx.c @@ -111,7 +111,7 @@ IMB_JOB * submit_job_sha384_avx(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 1, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x2_avx_from_c); + SHA384_PAD_SIZE, call_sha512_x2_avx_from_c, 0); } IMB_DLL_LOCAL @@ -119,7 +119,7 @@ IMB_JOB * flush_job_sha384_avx(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 0, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x2_avx_from_c); + SHA384_PAD_SIZE, call_sha512_x2_avx_from_c, 0); } /* ========================================================================== */ @@ -132,7 +132,7 @@ IMB_JOB * submit_job_sha512_avx(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 1, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x2_avx_from_c); + SHA512_PAD_SIZE, call_sha512_x2_avx_from_c, 0); } IMB_DLL_LOCAL @@ -140,5 +140,5 @@ IMB_JOB * flush_job_sha512_avx(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 0, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x2_avx_from_c); + SHA512_PAD_SIZE, call_sha512_x2_avx_from_c, 0); } diff --git a/lib/avx_t2/mb_mgr_avx_t2.c b/lib/avx_t2/mb_mgr_avx_t2.c index f198097bb9eacf650426ec5597ea6950f3e665f5..3899e99589a0a1d79867a3354984a455ee2538ab 100644 --- a/lib/avx_t2/mb_mgr_avx_t2.c +++ b/lib/avx_t2/mb_mgr_avx_t2.c @@ -71,6 +71,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_avx_t2 #define SUBMIT_HASH_BURST submit_hash_burst_avx_t2 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_avx_t2 +#define SUBMIT_AEAD_BURST submit_aead_burst_avx_t2 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_avx_t2 #define SET_SUITE_ID_FN set_suite_id_avx_t2 /* Hash */ @@ -350,6 +352,9 @@ init_mb_mgr_avx_t2_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_AVX; + /* Set architecture type for future checks */ + state->used_arch_type = 2; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -373,6 +378,8 @@ init_mb_mgr_avx_t2_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_avx; diff --git a/lib/include/arch_avx2_type4.h b/lib/include/arch_avx2_type4.h index 8b9628ce213091f08f29ac961773de527d021401..ba24878cedccaf89f5db05e5aed5800297215876 100644 --- a/lib/include/arch_avx2_type4.h +++ b/lib/include/arch_avx2_type4.h @@ -75,11 +75,6 @@ flush_job_sha384_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job); IMB_JOB * flush_job_sha512_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job); -IMB_JOB * -submit_job_sha384_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job); -IMB_JOB * -submit_job_sha512_ni_avx2(MB_MGR_SHA_512_OOO *state, IMB_JOB *job); - IMB_JOB * flush_job_hmac_sha_384_ni_avx2(MB_MGR_HMAC_SHA_512_OOO *state); IMB_JOB * @@ -90,4 +85,7 @@ submit_job_hmac_sha_384_ni_avx2(MB_MGR_HMAC_SHA_512_OOO *state, IMB_JOB *job); IMB_JOB * submit_job_hmac_sha_512_ni_avx2(MB_MGR_HMAC_SHA_512_OOO *state, IMB_JOB *job); +void +call_sha512_ni_x2_avx2_from_c(SHA512_ARGS *args, uint64_t size_in_blocks); + #endif /* IMB_ASM_AVX2_T4_H */ diff --git a/lib/include/constants.inc b/lib/include/constants.inc index 152c1ca3b10c62af34b73da72e7f54d54d6ca0c8..a259ea0a085a2bb7cc4b43fe7d8394710523f1dd 100644 --- a/lib/include/constants.inc +++ b/lib/include/constants.inc @@ -59,6 +59,7 @@ ;; Sanity checks to fail build if not satisfied %define SHA1NI_DIGEST_ROW_SIZE (NUM_SHA1_DIGEST_WORDS * SHA1_DIGEST_WORD_SIZE) %define SHA256NI_DIGEST_ROW_SIZE (NUM_SHA256_DIGEST_WORDS * SHA256_DIGEST_WORD_SIZE) +%define SHA512NI_DIGEST_ROW_SIZE (NUM_SHA512_DIGEST_WORDS * SHA512_DIGEST_WORD_SIZE) %define MD5_BLK_SZ 128 ; in bytes %define SHA1_BLK_SZ 64 ; in bytes diff --git a/lib/include/ipsec_ooo_mgr.h b/lib/include/ipsec_ooo_mgr.h index c446099feae2b3e9af3c017c8063af53b407f2d9..4b67cadda1e1cc927fe9e89bbb59abab5f882413 100644 --- a/lib/include/ipsec_ooo_mgr.h +++ b/lib/include/ipsec_ooo_mgr.h @@ -215,6 +215,7 @@ typedef struct { IMB_JOB *job_in_lane[16]; uint64_t num_lanes_inuse; DECLARE_ALIGNED(uint64_t lens64[16], 64); + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_AES_OOO; @@ -265,6 +266,7 @@ typedef struct { DECLARE_ALIGNED(IMB_JOB *job_in_lane[16], 16); uint64_t num_lanes_inuse; DECLARE_ALIGNED(uint8_t init_blocks[16 * (4 * 16)], 64); + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_CCM_OOO; @@ -350,6 +352,7 @@ typedef struct { uint64_t unused_lanes; HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_SHA1_LANES]; uint32_t num_lanes_inuse; + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_HMAC_SHA_1_OOO; @@ -359,6 +362,7 @@ typedef struct { uint64_t unused_lanes; HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_SHA1_LANES]; uint32_t num_lanes_inuse; + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_SHA_1_OOO; @@ -368,6 +372,7 @@ typedef struct { uint64_t unused_lanes; HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_SHA256_LANES]; uint32_t num_lanes_inuse; + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_HMAC_SHA_256_OOO; @@ -377,6 +382,7 @@ typedef struct { uint64_t unused_lanes; HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_SHA256_LANES]; uint32_t num_lanes_inuse; + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_SHA_256_OOO; @@ -385,6 +391,7 @@ typedef struct { DECLARE_ALIGNED(uint16_t lens[8], 16); uint64_t unused_lanes; HMAC_SHA512_LANE_DATA ldata[AVX512_NUM_SHA512_LANES]; + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_HMAC_SHA_512_OOO; @@ -394,6 +401,7 @@ typedef struct { uint64_t unused_lanes; HMAC_SHA512_LANE_DATA ldata[AVX512_NUM_SHA512_LANES]; uint32_t num_lanes_inuse; + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_SHA_512_OOO; @@ -408,6 +416,7 @@ typedef struct { uint64_t unused_lanes; HMAC_SHA1_LANE_DATA ldata[AVX512_NUM_MD5_LANES]; uint32_t num_lanes_inuse; + uint32_t total_num_lanes; uint64_t road_block; } MB_MGR_HMAC_MD5_OOO; @@ -709,6 +718,96 @@ IMB_DLL_EXPORT uint32_t submit_hash_burst_nocheck_avx512_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, const IMB_HASH_ALG hash); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_sse_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_sse_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_sse_t3(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx2_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx2_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx2_t3(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx2_t4(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx512_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_avx512_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); + +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_sse_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_sse_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_sse_t3(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx2_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx2_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx2_t3(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx2_t4(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx512_t1(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_avx512_t2(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); + /* SSE TYPE1 manager functions */ IMB_DLL_EXPORT IMB_JOB * submit_job_sse_t1(IMB_MGR *state); diff --git a/lib/include/mb_mgr_burst.h b/lib/include/mb_mgr_burst.h index da761536fa5bfeb13de689b05a8fd2ae5bc2220b..7d8709200fa8aad0b9a959bbbf1c6bc1b4cbd522 100644 --- a/lib/include/mb_mgr_burst.h +++ b/lib/include/mb_mgr_burst.h @@ -36,6 +36,102 @@ #ifndef __aarch64__ __forceinline uint32_t +submit_aes_ccm_burst(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_KEY_SIZE_BYTES key_size, const int run_check, + const IMB_CIPHER_DIRECTION dir) +{ + uint32_t completed_jobs = 0; + MB_MGR_CCM_OOO *aes_ccm_ooo; + typedef IMB_JOB *(*submit_ccm_t)(MB_MGR_CCM_OOO *state, IMB_JOB *job); + submit_ccm_t submit_auth_ccm_fn; + typedef IMB_JOB *(*flush_ccm_t)(MB_MGR_CCM_OOO *state); + flush_ccm_t flush_auth_ccm_fn; + typedef IMB_JOB *(*aes_cntr_ccm_t)(IMB_JOB *job); + aes_cntr_ccm_t cntr_ccm_fn; + uint32_t i; + + if (key_size == IMB_KEY_128_BYTES) { + aes_ccm_ooo = state->aes_ccm_ooo; + submit_auth_ccm_fn = SUBMIT_JOB_AES128_CCM_AUTH; + flush_auth_ccm_fn = FLUSH_JOB_AES128_CCM_AUTH; + cntr_ccm_fn = AES_CNTR_CCM_128; + } else { + aes_ccm_ooo = state->aes256_ccm_ooo; + submit_auth_ccm_fn = SUBMIT_JOB_AES256_CCM_AUTH; + flush_auth_ccm_fn = FLUSH_JOB_AES256_CCM_AUTH; + cntr_ccm_fn = AES_CNTR_CCM_256; + } + + if (run_check) { + + /* validate jobs */ + for (i = 0; i < n_jobs; i++) { + IMB_JOB *job = &jobs[i]; + + /* validate job */ + if (is_job_invalid(state, job, IMB_CIPHER_CCM, IMB_AUTH_AES_CCM, dir, + key_size)) { + job->status = IMB_STATUS_INVALID_ARGS; + return 0; + } + } + } + + if (dir == IMB_DIR_ENCRYPT) { + /* First authenticate with AES-CMAC */ + /* submit all jobs */ + for (i = 0; i < n_jobs; i++) { + IMB_JOB *job = &jobs[i]; + + job = submit_auth_ccm_fn(aes_ccm_ooo, job); + if (job != NULL) + completed_jobs++; + } + /* flush any outstanding jobs */ + if (completed_jobs != n_jobs) + while (flush_auth_ccm_fn(aes_ccm_ooo) != NULL) + completed_jobs++; + + /* Then encrypt with AES-CTR */ + for (i = 0; i < n_jobs; i++) { + IMB_JOB *job = &jobs[i]; + + cntr_ccm_fn(job); + job->status = IMB_STATUS_COMPLETED; + } + } else { + /* First decrypt with AES-CTR */ + for (i = 0; i < n_jobs; i++) { + IMB_JOB *job = &jobs[i]; + + cntr_ccm_fn(job); + } + + /* Then authenticate with AES-CMAC */ + /* submit all jobs */ + for (i = 0; i < n_jobs; i++) { + IMB_JOB *job = &jobs[i]; + + job = submit_auth_ccm_fn(aes_ccm_ooo, job); + if (job != NULL) { + job->status = IMB_STATUS_COMPLETED; + completed_jobs++; + } + } + /* flush any outstanding jobs */ + if (completed_jobs != n_jobs) { + IMB_JOB *job = NULL; + + while ((job = flush_auth_ccm_fn(aes_ccm_ooo)) != NULL) { + job->status = IMB_STATUS_COMPLETED; + completed_jobs++; + } + } + } + + return completed_jobs; +} +__forceinline uint32_t submit_aes_cbc_burst_enc(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, const IMB_KEY_SIZE_BYTES key_size, const int run_check) { @@ -432,6 +528,45 @@ SUBMIT_CIPHER_BURST_NOCHECK(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs } #ifndef __aarch64__ +__forceinline uint32_t +submit_aead_burst_and_check(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size, const int run_check) +{ + /* reset error status */ + imb_set_errno(state, 0); + + if (run_check) + if (jobs == NULL) { + imb_set_errno(state, IMB_ERR_NULL_BURST); + return 0; + } + + if (cipher == IMB_CIPHER_CCM) + return submit_aes_ccm_burst(state, jobs, n_jobs, key_size, run_check, dir); + + /* unsupported cipher mode */ + imb_set_errno(state, IMB_ERR_CIPH_MODE); + + return 0; +} + +uint32_t +SUBMIT_AEAD_BURST(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size) +{ + return submit_aead_burst_and_check(state, jobs, n_jobs, cipher, dir, key_size, 1); +} + +uint32_t +SUBMIT_AEAD_BURST_NOCHECK(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size) +{ + return submit_aead_burst_and_check(state, jobs, n_jobs, cipher, dir, key_size, 0); +} + __forceinline uint32_t submit_burst_hmac_sha_x(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, const int run_check, const IMB_HASH_ALG hash_alg) diff --git a/lib/include/mb_mgr_datastruct.inc b/lib/include/mb_mgr_datastruct.inc index da06c842c0936e01f230b10e660ef92903b11003..800fbcce306ab533005c787bd40353528beb29d7 100644 --- a/lib/include/mb_mgr_datastruct.inc +++ b/lib/include/mb_mgr_datastruct.inc @@ -62,6 +62,7 @@ FIELD _aes_unused_lanes, 8, 8 FIELD _aes_job_in_lane, 16*8, 8 FIELD _aes_lanes_in_use, 8, 8 FIELD _aes_lens_64, 16*8, 64 +FIELD _aes_total_num_lanes, 4, 4 FIELD _aes_road_block, 8, 8 END_FIELDS %assign _MB_MGR_AES_OOO_size _FIELD_OFFSET @@ -178,6 +179,7 @@ FIELD _aes_ccm_unused_lanes, 8, 8 FIELD _aes_ccm_job_in_lane, 16*8, 16 FIELD _aes_ccm_num_lanes_inuse, 8, 8 FIELD _aes_ccm_init_blocks, 16*4*16, 64 +FIELD _aes_ccm_total_num_lanes, 4, 4 FIELD _aes_ccm_road_block, 8, 8 END_FIELDS %assign _MB_MGR_CCM_OOO_size _FIELD_OFFSET @@ -321,6 +323,7 @@ FIELD _lens, 32, 32 FIELD _unused_lanes, 8, 8 FIELD _ldata, _HMAC_SHA1_LANE_DATA_size*MAX_SHA1_LANES, _HMAC_SHA1_LANE_DATA_align FIELD _num_lanes_inuse_sha1, 4, 4 +FIELD _total_num_lanes_sha1, 4, 4 FIELD _road_block_sha1, 8, 8 END_FIELDS %assign _MB_MGR_HMAC_SHA_1_OOO_size _FIELD_OFFSET @@ -348,6 +351,7 @@ FIELD _lens_sha256, 16*2, 16 FIELD _unused_lanes_sha256, 8, 8 FIELD _ldata_sha256, _HMAC_SHA1_LANE_DATA_size * MAX_SHA256_LANES, _HMAC_SHA1_LANE_DATA_align FIELD _num_lanes_inuse_sha256, 4, 4 +FIELD _total_num_lanes_sha256, 4, 4 FIELD _road_block_sha256, 8, 8 END_FIELDS %assign _MB_MGR_HMAC_SHA_256_OOO_size _FIELD_OFFSET @@ -375,6 +379,7 @@ FIELD _args_sha512, _SHA512_ARGS_size, _SHA512_ARGS_align FIELD _lens_sha512, 16, 16 FIELD _unused_lanes_sha512, 8, 8 FIELD _ldata_sha512, _SHA512_LANE_DATA_size * MAX_SHA512_LANES, _SHA512_LANE_DATA_align +FIELD _total_num_lanes_sha512, 4, 4 FIELD _road_block_sha512, 8, 8 END_FIELDS %assign _MB_MGR_HMAC_SHA_512_OOO_size _FIELD_OFFSET @@ -404,6 +409,7 @@ FIELD _lens_md5, MAX_MD5_LANES*2, 16 FIELD _unused_lanes_md5, 8, 8 FIELD _ldata_md5, _HMAC_SHA1_LANE_DATA_size * MAX_MD5_LANES, _HMAC_SHA1_LANE_DATA_align FIELD _num_lanes_inuse_md5, 4, 8 +FIELD _total_num_lanes_md5, 4, 4 FIELD _road_block_md5, 8, 8 END_FIELDS %assign _MB_MGR_HMAC_MD5_OOO_size _FIELD_OFFSET diff --git a/lib/include/noaesni.h b/lib/include/noaesni.h index 326c72c12df14687e7f1d0dc154416afb90a045a..2c8635acc01410b2624e659a73ef82dc28672391 100644 --- a/lib/include/noaesni.h +++ b/lib/include/noaesni.h @@ -86,6 +86,16 @@ submit_hash_burst_sse_no_aesni(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_j IMB_DLL_EXPORT uint32_t submit_hash_burst_nocheck_sse_no_aesni(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, const IMB_HASH_ALG hash); + +IMB_DLL_EXPORT uint32_t +submit_aead_burst_sse_no_aesni(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); +IMB_DLL_EXPORT uint32_t +submit_aead_burst_nocheck_sse_no_aesni(IMB_MGR *state, IMB_JOB *jobs, const uint32_t n_jobs, + const IMB_CIPHER_MODE cipher, const IMB_CIPHER_DIRECTION dir, + const IMB_KEY_SIZE_BYTES key_size); + IMB_DLL_EXPORT void aes_keyexp_128_sse_no_aesni(const void *key, void *enc_exp_keys, void *dec_exp_keys); IMB_DLL_EXPORT void diff --git a/lib/include/sha_mb_mgr.h b/lib/include/sha_mb_mgr.h index 7d1ebbee5b3f111705ddf0bac0e1a97522f4435c..702dc37185abb8267cd22754e84e34741fdee995 100644 --- a/lib/include/sha_mb_mgr.h +++ b/lib/include/sha_mb_mgr.h @@ -70,6 +70,18 @@ copy_bswap8_array_mb(void *dst, const void *src, const size_t num, const size_t outp[i] = bswap8(inp[lane + i * offset]); } +__forceinline void +copy_bswap8_array_mb_ni(void *dst, const void *src, const size_t num, const unsigned lane, + const int digest_row_sz) +{ + uint64_t *outp = (uint64_t *) dst; + const uint64_t *inp = (const uint64_t *) src; + size_t i; + + for (i = 0; i < num; i++) + outp[i] = bswap8(inp[digest_row_sz * lane + i]); +} + __forceinline void sha1_mb_init_digest(uint32_t *digest, const unsigned lane) { @@ -155,6 +167,19 @@ sha384_mb_init_digest(uint64_t *digest, const unsigned lane) digest[lane + 7 * 8] = SHA384_H7; } +__forceinline void +sha384_ni_mb_init_digest(uint64_t *digest, const unsigned lane) +{ + digest[8 * lane + 0] = SHA384_H0; + digest[8 * lane + 1] = SHA384_H1; + digest[8 * lane + 2] = SHA384_H2; + digest[8 * lane + 3] = SHA384_H3; + digest[8 * lane + 4] = SHA384_H4; + digest[8 * lane + 5] = SHA384_H5; + digest[8 * lane + 6] = SHA384_H6; + digest[8 * lane + 7] = SHA384_H7; +} + __forceinline void sha512_mb_init_digest(uint64_t *digest, const unsigned lane) { @@ -168,6 +193,19 @@ sha512_mb_init_digest(uint64_t *digest, const unsigned lane) digest[lane + 7 * 8] = SHA512_H7; } +__forceinline void +sha512_ni_mb_init_digest(uint64_t *digest, const unsigned lane) +{ + digest[8 * lane + 0] = SHA512_H0; + digest[8 * lane + 1] = SHA512_H1; + digest[8 * lane + 2] = SHA512_H2; + digest[8 * lane + 3] = SHA512_H3; + digest[8 * lane + 4] = SHA512_H4; + digest[8 * lane + 5] = SHA512_H5; + digest[8 * lane + 6] = SHA512_H6; + digest[8 * lane + 7] = SHA512_H7; +} + __forceinline void sha_mb_generic_init(void *digest, const int sha_type, const unsigned lane) { @@ -192,6 +230,10 @@ sha_ni_mb_generic_init(void *digest, const int sha_type, const unsigned lane) sha224_ni_mb_init_digest(digest, lane); else if (sha_type == 256) sha256_ni_mb_init_digest(digest, lane); + else if (sha_type == 384) + sha384_ni_mb_init_digest(digest, lane); + else if (sha_type == 512) + sha512_ni_mb_init_digest(digest, lane); } __forceinline void @@ -219,6 +261,10 @@ sha_ni_mb_generic_write_digest(void *dst, const void *src, const int sha_type, c copy_bswap4_array_mb_ni(dst, src, NUM_SHA_224_DIGEST_WORDS, lane, 8); else if (sha_type == 256) copy_bswap4_array_mb_ni(dst, src, NUM_SHA_256_DIGEST_WORDS, lane, 8); + else if (sha_type == 384) + copy_bswap8_array_mb_ni(dst, src, NUM_SHA_384_DIGEST_WORDS, lane, 8); + else if (sha_type == 512) + copy_bswap8_array_mb_ni(dst, src, NUM_SHA_512_DIGEST_WORDS, lane, 8); } __forceinline void @@ -545,7 +591,8 @@ submit_flush_job_sha_256(MB_MGR_SHA_256_OOO *state, IMB_JOB *job, const unsigned __forceinline IMB_JOB * submit_flush_job_sha_512(MB_MGR_SHA_512_OOO *state, IMB_JOB *job, const unsigned max_jobs, const int is_submit, const int sha_type, const uint64_t blk_size, - const uint64_t pad_size, void (*fn)(SHA512_ARGS *, uint64_t)) + const uint64_t pad_size, void (*fn)(SHA512_ARGS *, uint64_t), + const int shani) { unsigned lane, min_idx; IMB_JOB *ret_job = NULL; @@ -561,7 +608,10 @@ submit_flush_job_sha_512(MB_MGR_SHA_512_OOO *state, IMB_JOB *job, const unsigned state->num_lanes_inuse++; state->args.data_ptr[lane] = job->src + job->hash_start_src_offset_in_bytes; - sha_mb_generic_init(state->args.digest, sha_type, lane); + if (shani) + sha_ni_mb_generic_init(state->args.digest, sha_type, lane); + else + sha_mb_generic_init(state->args.digest, sha_type, lane); /* copy job data in and set up initial blocks */ state->ldata[lane].job_in_lane = job; @@ -656,8 +706,12 @@ submit_flush_job_sha_512(MB_MGR_SHA_512_OOO *state, IMB_JOB *job, const unsigned /* put back processed packet into unused lanes, set job as complete */ state->unused_lanes = (state->unused_lanes << 4) | min_idx; state->num_lanes_inuse--; - sha_mb_generic_write_digest(ret_job->auth_tag_output, state->args.digest, sha_type, 8, - min_idx); + if (shani) + sha_ni_mb_generic_write_digest(ret_job->auth_tag_output, state->args.digest, + sha_type, min_idx); + else + sha_mb_generic_write_digest(ret_job->auth_tag_output, state->args.digest, sha_type, + 8, min_idx); ret_job->status |= IMB_STATUS_COMPLETED_AUTH; state->ldata[min_idx].job_in_lane = NULL; return ret_job; diff --git a/lib/ipsec-mb.h b/lib/ipsec-mb.h index 37fc407ed46626923a7d1b402879c25e83678b5b..13d54c5c4dba1562b699315e42857c73c2899554 100644 --- a/lib/ipsec-mb.h +++ b/lib/ipsec-mb.h @@ -949,6 +949,7 @@ typedef int (*imb_self_test_cb_t)(void *cb_arg, const IMB_SELF_TEST_CALLBACK_DAT #define IMB_FEATURE_SM3NI (1ULL << 24) #define IMB_FEATURE_SM4NI (1ULL << 25) #define IMB_FEATURE_SHA512NI (1ULL << 26) +#define IMB_FEATURE_XSAVE (1ULL << 27) /** * Self test defines @@ -969,7 +970,7 @@ typedef int (*imb_self_test_cb_t)(void *cb_arg, const IMB_SELF_TEST_CALLBACK_DAT #define IMB_CPUFLAGS_SSE (IMB_CPUFLAGS_NO_AESNI | IMB_FEATURE_AESNI | IMB_FEATURE_PCLMULQDQ) #define IMB_CPUFLAGS_SSE_T2 (IMB_CPUFLAGS_SSE | IMB_FEATURE_SHANI) #define IMB_CPUFLAGS_SSE_T3 (IMB_CPUFLAGS_SSE_T2 | IMB_FEATURE_GFNI) -#define IMB_CPUFLAGS_AVX (IMB_CPUFLAGS_SSE | IMB_FEATURE_AVX) +#define IMB_CPUFLAGS_AVX (IMB_CPUFLAGS_SSE | IMB_FEATURE_AVX | IMB_FEATURE_XSAVE) #define IMB_CPUFLAGS_AVX2 (IMB_CPUFLAGS_AVX | IMB_FEATURE_AVX2 | IMB_FEATURE_BMI2) #define IMB_CPUFLAGS_AVX512 (IMB_CPUFLAGS_AVX2 | IMB_FEATURE_AVX512_SKX) #define IMB_CPUFLAGS_AVX512_T2 \ @@ -998,8 +999,10 @@ typedef struct IMB_MGR { uint64_t flags; /**< passed to alloc_mb_mgr() */ uint64_t features; /**< reflects features of multi-buffer instance */ - uint64_t reserved[5]; /**< reserved for the future */ - uint32_t used_arch; /**< Architecture being used */ + uint64_t reserved[4]; /**< reserved for the future */ + uint8_t reserved2[7]; /**< reserved for the future */ + uint8_t used_arch_type; /**< Architecture type being used */ + uint32_t used_arch; /**< Architecture being used */ int imb_errno; /**< per mb_mgr error status */ @@ -1160,6 +1163,9 @@ typedef struct IMB_MGR { imb_self_test_cb_t self_test_cb_fn; void *self_test_cb_arg; + submit_cipher_burst_t submit_aead_burst; + submit_cipher_burst_t submit_aead_burst_nocheck; + /* in-order scheduler fields */ int earliest_job; /**< byte offset, -1 if none */ int next_job; /**< byte offset */ @@ -1739,6 +1745,38 @@ init_mb_mgr_auto(IMB_MGR *state, IMB_ARCH *arch); #define IMB_SUBMIT_HASH_BURST_NOCHECK(_mgr, _jobs, _n_jobs, _hash) \ ((_mgr)->submit_hash_burst_nocheck((_mgr), (_jobs), (_n_jobs), (_hash))) +/** + * Submit multiple cipher jobs to be processed synchronously after validating. + * + * @param [in] _mgr Pointer to initialized IMB_MGR structure + * @param [in,out] _jobs Pointer to array of IMB_JOB structures + * @param [in] _n_jobs Number of jobs to process + * @param [in] _cipher Cipher algorithm of type #IMB_CIPHER_MODE + * @param [in] _dir Cipher direction of type #IMB_CIPHER_DIRECTION + * @param [in] _key_size Key size in bytes of type #IMB_KEY_SIZE_BYTES + * + * @return Number of completed jobs + */ +#define IMB_SUBMIT_AEAD_BURST(_mgr, _jobs, _n_jobs, _cipher, _dir, _key_size) \ + ((_mgr)->submit_aead_burst((_mgr), (_jobs), (_n_jobs), (_cipher), (_dir), (_key_size))) +/** + * Submit multiple cipher jobs to be processed synchronously without validating. + * + * This is more performant but less secure than IMB_SUBMIT_AEAD_BURST(). + * + * @param [in] _mgr Pointer to initialized IMB_MGR structure + * @param [in,out] _jobs Pointer to array of IMB_JOB structures + * @param [in] _n_jobs Number of jobs to process + * @param [in] _cipher Cipher algorithm of type #IMB_CIPHER_MODE + * @param [in] _dir Cipher direction of type #IMB_CIPHER_DIRECTION + * @param [in] _key_size Key size in bytes of type #IMB_KEY_SIZE_BYTES + * + * @return Number of completed jobs + */ +#define IMB_SUBMIT_AEAD_BURST_NOCHECK(_mgr, _jobs, _n_jobs, _cipher, _dir, _key_size) \ + ((_mgr)->submit_aead_burst_nocheck((_mgr), (_jobs), (_n_jobs), (_cipher), (_dir), \ + (_key_size))) + /* Key expansion and generation API's */ /** @@ -4011,6 +4049,94 @@ imb_self_test_set_cb(IMB_MGR *state, imb_self_test_cb_t cb_fn, void *cb_arg); */ IMB_DLL_EXPORT int imb_self_test_get_cb(IMB_MGR *state, imb_self_test_cb_t *cb_fn, void **cb_arg); + +/** + * @brief API to get a string with the architecture type being used. + * + * init_mb_mgr_XXX() must be called before this function call, + * where XXX is the desired architecture (can be auto). + * + * @param [in] state pointer to IMB_MGR + * @param [out] arch_type string with architecture type + * @param [out] description string with description of the arch type + * + * @return operation status. + * @retval 0 success + * @retval IMB_ERR_NULL_MBMGR invalid \a mb_mgr pointer + */ +IMB_DLL_EXPORT int +imb_get_arch_type_string(const IMB_MGR *state, const char **arch_type, const char **description); + +/** + * @brief Retrieves minimum burst size for good performance on hash algorithms. + * + * Depending on the architecture used, this function returns the minimum + * burst size to be used for good performance on the hash-only burst API. + * Note that this will not return a value for best performance, but the minimum needed + * to start maximizing the CPU core (i.e. enough buffers to utilize efficiently the CPU core + * resources, taking into account that when buffers have different sizes, a higher burst size is + * recommended). + * + * The output burst size may also be 1 (in case of a synchronous single-buffer implementation + * or 0 if the algorithm is not supported by the API). + * + * @param [in] mb_mgr pointer to IMB MGR structure + * @param [in] algo hash algorithm + * @param [out] out_burst_size pointer to store min burst size + * + * @return operation status. + * @retval 0 success + * @retval IMB_ERR_HASH_ALGO not supported \a algo + * @retval IMB_ERR_NULL_MBMGR invalid \a mb_mgr pointer + * @retval IMB_ERR_NULL_BURST invalid \a out_burst_size pointer + */ +IMB_DLL_EXPORT int +imb_hash_burst_get_size(const IMB_MGR *mb_mgr, const IMB_HASH_ALG algo, unsigned *out_burst_size); + +/** + * @brief Retrieves minimum burst size for good performance on cipher algorithms. + * + * Depending on the architecture used, this function returns the minimum + * burst size to be used for good performance on the cipher-only burst API. + * The output burst size can be 1 (in case of a synchronous single-buffer implementation + * or 0 if the algorithm is not supported by the API). + * + * @param [in] mb_mgr pointer to IMB MGR structure + * @param [in] cipher_mode cipher mode + * @param [out] out_burst_size pointer to store min burst size + * + * @return operation status. + * @retval 0 success + * @retval IMB_ERR_CIPHER_MODE not supported \a algo + * @retval IMB_ERR_NULL_MBMGR invalid \a mb_mgr pointer + * @retval IMB_ERR_NULL_BURST invalid \a out_burst_size pointer + */ +IMB_DLL_EXPORT int +imb_cipher_burst_get_size(const IMB_MGR *mb_mgr, const IMB_CIPHER_MODE cipher_mode, + unsigned *out_burst_size); + +/** + * @brief Retrieves minimum burst size for good performance on AEAD algorithms. + * + * Depending on the architecture used, this function returns the minimum + * burst size to be used for good performance on the AEAD burst API. + * The output burst size can be 1 (in case of a synchronous single-buffer implementation + * or 0 if the algorithm is not supported by the API). + * + * @param [in] mb_mgr pointer to IMB MGR structure + * @param [in] cipher_mode cipher mode + * @param [out] out_burst_size pointer to store min burst size + * + * @return operation status. + * @retval 0 success + * @retval IMB_ERR_CIPHER_MODE not supported \a algo + * @retval IMB_ERR_NULL_MBMGR invalid \a mb_mgr pointer + * @retval IMB_ERR_NULL_BURST invalid \a out_burst_size pointer + */ +IMB_DLL_EXPORT int +imb_aead_burst_get_size(const IMB_MGR *mb_mgr, const IMB_CIPHER_MODE cipher_mode, + unsigned *out_burst_size); + #ifdef __cplusplus } #endif diff --git a/lib/libIPSec_MB.def b/lib/libIPSec_MB.def index 4f5c283c8aaa10e8ee8b5a7bdeb77451fa4bc08e..9aa9b86bc067eaebb3e7e11e7460d5a6eac5dfc8 100644 --- a/lib/libIPSec_MB.def +++ b/lib/libIPSec_MB.def @@ -728,3 +728,31 @@ EXPORTS submit_job_nocheck_avx2_t4 @702 get_next_job_avx2_t4 @703 get_completed_job_avx2_t4 @704 + imb_hash_burst_get_size @705 + imb_get_arch_type_string @706 + submit_aead_burst_sse_t1 @707 + submit_aead_burst_sse_t2 @708 + submit_aead_burst_sse_t3 @709 + submit_aead_burst_sse_no_aesni @710 + submit_aead_burst_avx_t1 @711 + submit_aead_burst_avx_t2 @712 + submit_aead_burst_avx2_t1 @713 + submit_aead_burst_avx2_t2 @714 + submit_aead_burst_avx2_t3 @715 + submit_aead_burst_avx2_t4 @716 + submit_aead_burst_avx512_t1 @717 + submit_aead_burst_avx512_t2 @718 + submit_aead_burst_nocheck_sse_t1 @719 + submit_aead_burst_nocheck_sse_t2 @720 + submit_aead_burst_nocheck_sse_t3 @721 + submit_aead_burst_nocheck_sse_no_aesni @722 + submit_aead_burst_nocheck_avx_t1 @723 + submit_aead_burst_nocheck_avx_t2 @724 + submit_aead_burst_nocheck_avx2_t1 @725 + submit_aead_burst_nocheck_avx2_t2 @726 + submit_aead_burst_nocheck_avx2_t3 @727 + submit_aead_burst_nocheck_avx2_t4 @728 + submit_aead_burst_nocheck_avx512_t1 @729 + submit_aead_burst_nocheck_avx512_t2 @730 + imb_cipher_burst_get_size @731 + imb_aead_burst_get_size @732 diff --git a/lib/no-aesni/mb_mgr_sse_no_aesni.c b/lib/no-aesni/mb_mgr_sse_no_aesni.c index 7565ea887ed254648609e208eaa066b2bc84af0e..364f210385c3228699538b50c1e3b7347e7c10de 100644 --- a/lib/no-aesni/mb_mgr_sse_no_aesni.c +++ b/lib/no-aesni/mb_mgr_sse_no_aesni.c @@ -166,6 +166,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_sse_no_aesni #define SUBMIT_HASH_BURST submit_hash_burst_sse_no_aesni #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_sse_no_aesni +#define SUBMIT_AEAD_BURST submit_aead_burst_sse_no_aesni +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_sse_no_aesni #define SET_SUITE_ID_FN set_suite_id_sse_no_aesni #define SUBMIT_JOB_AES128_DEC submit_job_aes128_dec_sse_no_aesni @@ -329,6 +331,9 @@ init_mb_mgr_sse_no_aesni_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_NOAESNI; + /* Set architecture type for future checks */ + state->used_arch_type = 1; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -348,6 +353,8 @@ init_mb_mgr_sse_no_aesni_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = submit_cipher_burst_nocheck_sse_no_aesni; state->submit_hash_burst = submit_hash_burst_sse_no_aesni; state->submit_hash_burst_nocheck = submit_hash_burst_nocheck_sse_no_aesni; + state->submit_aead_burst = submit_aead_burst_sse_no_aesni; + state->submit_aead_burst_nocheck = submit_aead_burst_nocheck_sse_no_aesni; state->submit_job_nocheck = submit_job_nocheck_sse_no_aesni; state->get_completed_job = get_completed_job_sse_no_aesni; state->flush_job = flush_job_sse_no_aesni; diff --git a/lib/sse_t1/aes128_cntr_by8_sse.asm b/lib/sse_t1/aes128_cntr_by8_sse.asm index 84879607cbb0eab6c9532bb348aaeb7e3c0007e0..c680f4d04ef4e9d93ee057d47a910a043af82a79 100644 --- a/lib/sse_t1/aes128_cntr_by8_sse.asm +++ b/lib/sse_t1/aes128_cntr_by8_sse.asm @@ -31,6 +31,7 @@ %include "include/const.inc" %include "include/reg_sizes.inc" %include "include/clear_regs.inc" +%include "include/cet.inc" ; routine to do AES128 CNTR enc/decrypt "by8" ; XMM registers are clobbered. Saving/restoring must be done at a higher level @@ -566,6 +567,7 @@ align 32 align 32 MKGLOBAL(AES_CNTR_CCM_128,function,internal) AES_CNTR_CCM_128: + endbranch64 DO_CNTR CCM %else ;; aes_cntr_128_sse(void *in, void *IV, void *keys, void *out, UINT64 num_bytes, UINT64 iv_len) diff --git a/lib/sse_t1/aes256_cntr_by8_sse.asm b/lib/sse_t1/aes256_cntr_by8_sse.asm index f3e304bb1deea6913865a496be1f93a1a41a6e0b..5c3464385b7b677620c9dd7328ef012c29d1a301 100644 --- a/lib/sse_t1/aes256_cntr_by8_sse.asm +++ b/lib/sse_t1/aes256_cntr_by8_sse.asm @@ -31,6 +31,7 @@ %include "include/const.inc" %include "include/reg_sizes.inc" %include "include/clear_regs.inc" +%include "include/cet.inc" ; routine to do AES256 CNTR enc/decrypt "by8" ; XMM registers are clobbered. Saving/restoring must be done at a higher level @@ -594,6 +595,7 @@ align 32 align 32 MKGLOBAL(AES_CNTR_CCM_256,function,internal) AES_CNTR_CCM_256: + endbranch64 DO_CNTR CCM %else ;; aes_cntr_256_sse(void *in, void *IV, void *keys, void *out, UINT64 num_bytes, UINT64 iv_len) diff --git a/lib/sse_t1/mb_mgr_aes128_ccm_auth_submit_flush_x4_sse.asm b/lib/sse_t1/mb_mgr_aes128_ccm_auth_submit_flush_x4_sse.asm index 638f50b877fd283ebc6210b403df3cf4db40edbf..970bf332ccb9bb16d4f77f0c15e4862e528c9e4e 100644 --- a/lib/sse_t1/mb_mgr_aes128_ccm_auth_submit_flush_x4_sse.asm +++ b/lib/sse_t1/mb_mgr_aes128_ccm_auth_submit_flush_x4_sse.asm @@ -31,6 +31,7 @@ %include "include/reg_sizes.inc" %include "include/const.inc" %include "include/memcpy.inc" +%include "include/cet.inc" %ifndef NUM_LANES %define NUM_LANES 4 @@ -614,12 +615,14 @@ align 64 ; arg 2 : job MKGLOBAL(SUBMIT_JOB_AES_CCM_AUTH,function,internal) SUBMIT_JOB_AES_CCM_AUTH: + endbranch64 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_SSE SUBMIT ; IMB_JOB * flush_job_aes_ccm_auth_sse(MB_MGR_CCM_OOO *state) ; arg 1 : state MKGLOBAL(FLUSH_JOB_AES_CCM_AUTH,function,internal) FLUSH_JOB_AES_CCM_AUTH: + endbranch64 GENERIC_SUBMIT_FLUSH_JOB_AES_CCM_AUTH_SSE FLUSH mksection stack-noexec diff --git a/lib/sse_t1/mb_mgr_sse_t1.c b/lib/sse_t1/mb_mgr_sse_t1.c index a6bebbac26208c663a2a91a5a1631542cd94f1bc..1a1b38cddde34754cff42a72e6a0c56d9fe0a5ac 100644 --- a/lib/sse_t1/mb_mgr_sse_t1.c +++ b/lib/sse_t1/mb_mgr_sse_t1.c @@ -69,6 +69,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_sse_t1 #define SUBMIT_HASH_BURST submit_hash_burst_sse_t1 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_sse_t1 +#define SUBMIT_AEAD_BURST submit_aead_burst_sse_t1 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_sse_t1 #define SET_SUITE_ID_FN set_suite_id_sse_t1 @@ -351,6 +353,9 @@ init_mb_mgr_sse_t1_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_SSE; + /* Set architecture type for future checks */ + state->used_arch_type = 1; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -372,6 +377,8 @@ init_mb_mgr_sse_t1_internal(IMB_MGR *state, const int reset_mgrs) state->flush_burst = FLUSH_BURST; state->submit_cipher_burst = SUBMIT_CIPHER_BURST; state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; diff --git a/lib/sse_t1/sha_mb_sse.c b/lib/sse_t1/sha_mb_sse.c index c8dcb904ed9c4bfe306debd9a400f4459de93fd8..841717d51408baf3515d1d3158d0ab94fce9dcea 100644 --- a/lib/sse_t1/sha_mb_sse.c +++ b/lib/sse_t1/sha_mb_sse.c @@ -101,7 +101,7 @@ IMB_JOB * submit_job_sha384_sse(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 1, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x2_sse_from_c); + SHA384_PAD_SIZE, call_sha512_x2_sse_from_c, 0); } IMB_DLL_LOCAL @@ -109,7 +109,7 @@ IMB_JOB * flush_job_sha384_sse(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 0, 384, IMB_SHA_512_BLOCK_SIZE, - SHA384_PAD_SIZE, call_sha512_x2_sse_from_c); + SHA384_PAD_SIZE, call_sha512_x2_sse_from_c, 0); } /* ========================================================================== */ @@ -122,7 +122,7 @@ IMB_JOB * submit_job_sha512_sse(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 1, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x2_sse_from_c); + SHA512_PAD_SIZE, call_sha512_x2_sse_from_c, 0); } IMB_DLL_LOCAL @@ -130,5 +130,5 @@ IMB_JOB * flush_job_sha512_sse(MB_MGR_SHA_512_OOO *state, IMB_JOB *job) { return submit_flush_job_sha_512(state, job, 2, 0, 512, IMB_SHA_512_BLOCK_SIZE, - SHA512_PAD_SIZE, call_sha512_x2_sse_from_c); + SHA512_PAD_SIZE, call_sha512_x2_sse_from_c, 0); } diff --git a/lib/sse_t1/sm3_base_hmac_sse.asm b/lib/sse_t1/sm3_base_hmac_sse.asm index 748828070452384ec2ecf1d59b670c646e6d2fe4..7af3693c662c48a4c88ffa1b2ed70a6eaf64bc6c 100644 --- a/lib/sse_t1/sm3_base_hmac_sse.asm +++ b/lib/sse_t1/sm3_base_hmac_sse.asm @@ -80,7 +80,7 @@ extern sm3_base_update %xdefine t1 gp1 %xdefine t2 gp2 %xdefine t3 gp3 -%xdefine t4 gp3 +%xdefine t4 gp4 %xdefine r1 gp12 %xdefine r2 gp11 diff --git a/lib/sse_t2/mb_mgr_sse_t2.c b/lib/sse_t2/mb_mgr_sse_t2.c index 96e30d65f089e72813ad695e9445bc320e9afafd..2b5825a9def32c9e2deb5a11c1776fbdbcdaf00b 100644 --- a/lib/sse_t2/mb_mgr_sse_t2.c +++ b/lib/sse_t2/mb_mgr_sse_t2.c @@ -70,6 +70,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_sse_t2 #define SUBMIT_HASH_BURST submit_hash_burst_sse_t2 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_sse_t2 +#define SUBMIT_AEAD_BURST submit_aead_burst_sse_t2 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_sse_t2 #define SET_SUITE_ID_FN set_suite_id_sse_t2 /* Hash */ @@ -353,6 +355,9 @@ init_mb_mgr_sse_t2_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_SSE; + /* Set architecture type for future checks */ + state->used_arch_type = 2; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -376,6 +381,8 @@ init_mb_mgr_sse_t2_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_sse; diff --git a/lib/sse_t3/mb_mgr_sse_t3.c b/lib/sse_t3/mb_mgr_sse_t3.c index 55fb57cb424e06ca9748a971ba6ce5af4f3c1882..ea21e91e600425036b3c435ec4143c20fef27dd6 100644 --- a/lib/sse_t3/mb_mgr_sse_t3.c +++ b/lib/sse_t3/mb_mgr_sse_t3.c @@ -71,6 +71,8 @@ #define SUBMIT_CIPHER_BURST_NOCHECK submit_cipher_burst_nocheck_sse_t3 #define SUBMIT_HASH_BURST submit_hash_burst_sse_t3 #define SUBMIT_HASH_BURST_NOCHECK submit_hash_burst_nocheck_sse_t3 +#define SUBMIT_AEAD_BURST submit_aead_burst_sse_t3 +#define SUBMIT_AEAD_BURST_NOCHECK submit_aead_burst_nocheck_sse_t3 #define SET_SUITE_ID_FN set_suite_id_sse_t3 /* Hash */ @@ -354,6 +356,9 @@ init_mb_mgr_sse_t3_internal(IMB_MGR *state, const int reset_mgrs) /* Set architecture for future checks */ state->used_arch = (uint32_t) IMB_ARCH_SSE; + /* Set architecture type for future checks */ + state->used_arch_type = 3; + if (reset_mgrs) { reset_ooo_mgrs(state); @@ -377,6 +382,8 @@ init_mb_mgr_sse_t3_internal(IMB_MGR *state, const int reset_mgrs) state->submit_cipher_burst_nocheck = SUBMIT_CIPHER_BURST_NOCHECK; state->submit_hash_burst = SUBMIT_HASH_BURST; state->submit_hash_burst_nocheck = SUBMIT_HASH_BURST_NOCHECK; + state->submit_aead_burst = SUBMIT_AEAD_BURST; + state->submit_aead_burst_nocheck = SUBMIT_AEAD_BURST_NOCHECK; state->set_suite_id = SET_SUITE_ID_FN; state->keyexp_128 = aes_keyexp_128_sse; diff --git a/lib/win_x64.mak b/lib/win_x64.mak index c1e9f55fca6267cee33a02f76027a5637f5beb80..530c0004d9f1f304cd323c873d8eb230607bb24d 100644 --- a/lib/win_x64.mak +++ b/lib/win_x64.mak @@ -377,7 +377,8 @@ lib_objs1 = \ $(OBJ_DIR)\quic_chacha20_poly1305.obj \ $(OBJ_DIR)\hmac_ipad_opad.obj \ $(OBJ_DIR)\cipher_suite_id.obj \ - $(OBJ_DIR)\sm4_sse.obj + $(OBJ_DIR)\sm4_sse.obj \ + $(OBJ_DIR)\capabilities.obj lib_objs2 = \ $(OBJ_DIR)\mb_mgr_aes192_cbc_enc_flush_avx.obj \ @@ -596,8 +597,12 @@ avx2_t4_objs = \ $(OBJ_DIR)\sm3_msg_avx2.obj \ $(OBJ_DIR)\sm3_hmac_avx2.obj \ $(OBJ_DIR)\sha512_x1_ni_avx2.obj \ + $(OBJ_DIR)\sha512_x2_ni_avx2.obj \ $(OBJ_DIR)\sha_ni_avx2.obj \ - $(OBJ_DIR)\sha512_hmac_ni_avx2.obj + $(OBJ_DIR)\mb_mgr_hmac_sha512_submit_ni_avx2.obj \ + $(OBJ_DIR)\mb_mgr_hmac_sha512_flush_ni_avx2.obj \ + $(OBJ_DIR)\mb_mgr_hmac_sha384_submit_ni_avx2.obj \ + $(OBJ_DIR)\mb_mgr_hmac_sha384_flush_ni_avx2.obj !if "$(AESNI_EMU)" == "y" all_objs = $(lib_objs1) $(lib_objs2) $(gcm_objs) $(no_aesni_objs) diff --git a/lib/x86_64/capabilities.c b/lib/x86_64/capabilities.c new file mode 100644 index 0000000000000000000000000000000000000000..740824dc59013f1081df17b7e23cf94047c8f945 --- /dev/null +++ b/lib/x86_64/capabilities.c @@ -0,0 +1,172 @@ +/******************************************************************************* + Copyright (c) 2024, Intel Corporation + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +#include "ipsec-mb.h" +#include "ipsec_ooo_mgr.h" + +int +imb_hash_burst_get_size(const IMB_MGR *mb_mgr, const IMB_HASH_ALG algo, unsigned *out_burst_size) +{ +#ifdef SAFE_PARAM + if (mb_mgr == NULL) + return IMB_ERR_NULL_MBMGR; + + if (out_burst_size == NULL) + return IMB_ERR_NULL_BURST; +#endif + + switch (algo) { +#ifndef __aarch64__ + case IMB_AUTH_HMAC_SHA_1: + *out_burst_size = + ((MB_MGR_HMAC_SHA_1_OOO *) (mb_mgr->hmac_sha_1_ooo))->total_num_lanes; + break; + case IMB_AUTH_SHA_1: + *out_burst_size = ((MB_MGR_SHA_1_OOO *) (mb_mgr->sha_1_ooo))->total_num_lanes; + break; + case IMB_AUTH_HMAC_SHA_224: + *out_burst_size = + ((MB_MGR_HMAC_SHA_256_OOO *) (mb_mgr->hmac_sha_224_ooo))->total_num_lanes; + break; + case IMB_AUTH_SHA_224: + *out_burst_size = ((MB_MGR_SHA_256_OOO *) (mb_mgr->sha_224_ooo))->total_num_lanes; + break; + case IMB_AUTH_HMAC_SHA_256: + *out_burst_size = + ((MB_MGR_HMAC_SHA_256_OOO *) (mb_mgr->hmac_sha_256_ooo))->total_num_lanes; + break; + case IMB_AUTH_SHA_256: + *out_burst_size = ((MB_MGR_SHA_256_OOO *) (mb_mgr->sha_256_ooo))->total_num_lanes; + break; + case IMB_AUTH_HMAC_SHA_384: + *out_burst_size = + ((MB_MGR_HMAC_SHA_512_OOO *) (mb_mgr->hmac_sha_384_ooo))->total_num_lanes; + break; + case IMB_AUTH_SHA_384: + *out_burst_size = ((MB_MGR_SHA_512_OOO *) (mb_mgr->sha_384_ooo))->total_num_lanes; + break; + case IMB_AUTH_HMAC_SHA_512: + *out_burst_size = + ((MB_MGR_HMAC_SHA_512_OOO *) (mb_mgr->hmac_sha_512_ooo))->total_num_lanes; + break; + case IMB_AUTH_SHA_512: + *out_burst_size = ((MB_MGR_SHA_512_OOO *) (mb_mgr->sha_512_ooo))->total_num_lanes; + break; +#endif /* __aarch64__ */ + default: + *out_burst_size = 0; + return IMB_ERR_HASH_ALGO; + } + + return 0; +} + +int +imb_cipher_burst_get_size(const IMB_MGR *mb_mgr, const IMB_CIPHER_MODE cipher_mode, + unsigned *out_burst_size) +{ + switch (cipher_mode) { + case IMB_CIPHER_ECB: + case IMB_CIPHER_CNTR: + *out_burst_size = 1; + break; + case IMB_CIPHER_CBC: + *out_burst_size = ((MB_MGR_AES_OOO *) (mb_mgr->aes128_ooo))->total_num_lanes; + break; + default: + *out_burst_size = 0; + return IMB_ERR_CIPH_MODE; + } + + return 0; +} + +int +imb_aead_burst_get_size(const IMB_MGR *mb_mgr, const IMB_CIPHER_MODE cipher_mode, + unsigned *out_burst_size) +{ + if (cipher_mode == IMB_CIPHER_CCM) { + *out_burst_size = ((MB_MGR_CCM_OOO *) (mb_mgr->aes_ccm_ooo))->total_num_lanes; + return 0; + } else { + *out_burst_size = 0; + return IMB_ERR_CIPH_MODE; + } +} + +int +imb_get_arch_type_string(const IMB_MGR *state, const char **arch_type, const char **description) +{ +#ifdef SAFE_PARAM + if (state == NULL) + return IMB_ERR_NULL_MBMGR; + if (arch_type == NULL) + return EINVAL; +#endif + struct arch_type_map { + IMB_ARCH arch; + uint8_t type; + const char *arch_type; + const char *description; + }; + + const struct arch_type_map arch_type_mappings[] = { + { IMB_ARCH_NOAESNI, 0, "AESNI Emulation", "CPU ISA: SSE" }, +#ifndef __aarch64__ + { IMB_ARCH_SSE, 1, "SSE Type 1", "CPU ISA: AES, PCLMUL, SSE" }, + { IMB_ARCH_SSE, 2, "SSE Type 2", "CPU ISA: AES, PCLMUL, SSE, SHA-NI" }, + { IMB_ARCH_SSE, 3, "SSE Type 3", "CPU ISA: AES, PCLMUL, SSE, SHA-NI, GFNI" }, + { IMB_ARCH_AVX, 1, "AVX Type 1", "CPU ISA: AES, PCLMUL, SSE, AVX" }, + { IMB_ARCH_AVX, 2, "AVX Type 2", "CPU ISA: AES, PCLMUL, SSE, AVX, SHA-NI" }, + { IMB_ARCH_AVX2, 1, "AVX2 Type 1", "CPU ISA: AES, PCLMUL, SSE, AVX, AVX2" }, + { IMB_ARCH_AVX2, 2, "AVX2 Type 2", + "CPU ISA: VAES, VPCLMUL, SSE, AVX, AVX2, SHA-NI, GFNI" }, + { IMB_ARCH_AVX2, 3, "AVX2 Type 3", + "CPU ISA: VAES, VPCLMUL, SSE, AVX, AVX2, SHA-NI, GFNI, IFMA" }, + { IMB_ARCH_AVX2, 4, "AVX2 Type 4", + "CPU ISA: VAES, VPCLMUL, SSE, AVX, AVX2, SHA-NI, GFNI, IFMA, SHA512-NI, SM3-NI, " + "SM4-NI" }, + { IMB_ARCH_AVX512, 1, "AVX512 Type 1", + "CPU ISA: AES, PCLMUL, SSE, AVX, AVX2, AVX512" }, + { IMB_ARCH_AVX512, 2, "AVX512 Type 2", + "CPU ISA: VAES, VPCLMUL, SSE, AVX, AVX2, AVX512, GFNI, SHA-NI" }, +#endif /* __aarch64__ */ + }; + + for (unsigned int i = 0; i < IMB_DIM(arch_type_mappings); i++) { + if (arch_type_mappings[i].arch == state->used_arch && + arch_type_mappings[i].type == state->used_arch_type) { + *arch_type = arch_type_mappings[i].arch_type; + if (description != NULL) + *description = arch_type_mappings[i].description; + + break; + } + *arch_type = "Invalid arch type"; + } + return 0; +} diff --git a/lib/x86_64/cpu_feature.c b/lib/x86_64/cpu_feature.c index 3de9acbf5c86909e42ee3d2e0e6aaf6301ff00e6..ed385a16ea62508fc88837950904ee383f53309b 100644 --- a/lib/x86_64/cpu_feature.c +++ b/lib/x86_64/cpu_feature.c @@ -221,6 +221,13 @@ detect_sm4ni(void) #endif } +static uint32_t +detect_xsave(void) +{ + /* Check presence of XSAVE - bit 26 of ECX */ + return (cpuid_1_0.ecx & (1UL << 26)); +} + uint64_t cpu_feature_detect(void) { @@ -249,7 +256,8 @@ cpu_feature_detect(void) { 7, IMB_FEATURE_HYBRID, detect_hybrid }, { 7, IMB_FEATURE_SM3NI, detect_sm3ni }, { 7, IMB_FEATURE_SM4NI, detect_sm4ni }, - { 7, IMB_FEATURE_SHA512NI, detect_sha512ni } }; + { 7, IMB_FEATURE_SHA512NI, detect_sha512ni }, + { 1, IMB_FEATURE_XSAVE, detect_xsave } }; struct cpuid_regs r; unsigned hi_leaf_number = 0; uint64_t features = 0; diff --git a/lib/x86_64/ooo_mgr_reset.c b/lib/x86_64/ooo_mgr_reset.c index f73ca3da4e101e008a918ad2fe670f159a73a672..098d05040f7dd6526368a4423a619a0d3f69f464 100644 --- a/lib/x86_64/ooo_mgr_reset.c +++ b/lib/x86_64/ooo_mgr_reset.c @@ -41,6 +41,8 @@ ooo_mgr_aes_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_AES_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; + if (num_lanes == 4) p_mgr->unused_lanes = 0xF3210; else if (num_lanes == 8) @@ -96,6 +98,8 @@ ooo_mgr_ccm_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_CCM_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; + if (num_lanes == 4) p_mgr->unused_lanes = 0xF3210; else if (num_lanes == 8) @@ -134,6 +138,7 @@ ooo_mgr_hmac_sha1_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_HMAC_SHA_1_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; for (i = 0; i < num_lanes; i++) { p_mgr->ldata[i].extra_block[IMB_SHA1_BLOCK_SIZE] = 0x80; @@ -164,6 +169,7 @@ ooo_mgr_hmac_sha224_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_HMAC_SHA_256_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; for (i = 0; i < num_lanes; i++) { p_mgr->ldata[i].extra_block[IMB_SHA_256_BLOCK_SIZE] = 0x80; @@ -194,6 +200,7 @@ ooo_mgr_hmac_sha256_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_HMAC_SHA_256_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; for (i = 0; i < num_lanes; i++) { p_mgr->ldata[i].extra_block[IMB_SHA_256_BLOCK_SIZE] = 0x80; @@ -224,6 +231,7 @@ ooo_mgr_hmac_sha384_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_HMAC_SHA_512_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; for (i = 0; i < num_lanes; i++) { p_mgr->ldata[i].extra_block[IMB_SHA_384_BLOCK_SIZE] = 0x80; @@ -260,6 +268,7 @@ ooo_mgr_hmac_sha512_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_HMAC_SHA_512_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; for (i = 0; i < num_lanes; i++) { p_mgr->ldata[i].extra_block[IMB_SHA_512_BLOCK_SIZE] = 0x80; @@ -296,6 +305,7 @@ ooo_mgr_hmac_md5_reset(void *p_ooo_mgr, const unsigned num_lanes) memset(p_mgr, 0, offsetof(MB_MGR_HMAC_MD5_OOO, road_block)); memset(p_mgr->lens, 0xff, sizeof(p_mgr->lens)); + p_mgr->total_num_lanes = num_lanes; for (i = 0; i < num_lanes; i++) { p_mgr->ldata[i].extra_block[64] = 0x80; @@ -343,6 +353,7 @@ ooo_mgr_sha1_reset(void *p_ooo_mgr, const unsigned num_lanes) MB_MGR_SHA_1_OOO *p_mgr = (MB_MGR_SHA_1_OOO *) p_ooo_mgr; memset(p_mgr, 0, offsetof(MB_MGR_SHA_1_OOO, road_block)); + p_mgr->total_num_lanes = num_lanes; if (num_lanes == 2) p_mgr->unused_lanes = 0xF10; /* SHANI */ @@ -361,6 +372,7 @@ ooo_mgr_sha256_reset(void *p_ooo_mgr, const unsigned num_lanes) MB_MGR_SHA_256_OOO *p_mgr = (MB_MGR_SHA_256_OOO *) p_ooo_mgr; memset(p_mgr, 0, offsetof(MB_MGR_SHA_256_OOO, road_block)); + p_mgr->total_num_lanes = num_lanes; if (num_lanes == 2) p_mgr->unused_lanes = 0xF10; /* SHANI */ @@ -379,6 +391,7 @@ ooo_mgr_sha512_reset(void *p_ooo_mgr, const unsigned num_lanes) MB_MGR_SHA_512_OOO *p_mgr = (MB_MGR_SHA_512_OOO *) p_ooo_mgr; memset(p_mgr, 0, offsetof(MB_MGR_SHA_512_OOO, road_block)); + p_mgr->total_num_lanes = num_lanes; if (num_lanes == AVX_NUM_SHA512_LANES) p_mgr->unused_lanes = 0xF10; diff --git a/perf/ipsec_perf.c b/perf/ipsec_perf.c index 71bd057215c98316db270194e9ed283728f75463..2819bcfec302b9422c70d1f518b9e9e8ec8233f4 100644 --- a/perf/ipsec_perf.c +++ b/perf/ipsec_perf.c @@ -771,6 +771,7 @@ typedef enum { TEST_API_BURST, TEST_API_CIPHER_BURST, TEST_API_HASH_BURST, + TEST_API_AEAD_BURST, TEST_API_DIRECT, TEST_API_QUIC, TEST_API_NUMOF @@ -1348,6 +1349,161 @@ translate_cipher_mode(const enum test_cipher_mode_e test_mode) return c_mode; } +/* + * This function translates enum test_hash_alg_e to be used by ipsec_mb + * library + */ +static IMB_HASH_ALG +translate_hash_alg(const enum test_hash_alg_e test_mode) +{ + IMB_HASH_ALG hash_alg = IMB_AUTH_NULL; + + switch (test_mode) { + case TEST_SHA1: + hash_alg = IMB_AUTH_SHA_1; + break; + case TEST_SHA_224: + hash_alg = IMB_AUTH_SHA_224; + break; + case TEST_SHA_256: + hash_alg = IMB_AUTH_SHA_256; + break; + case TEST_SHA_384: + hash_alg = IMB_AUTH_SHA_384; + break; + case TEST_SHA_512: + hash_alg = IMB_AUTH_SHA_512; + break; + case TEST_SHA1_HMAC: + hash_alg = IMB_AUTH_HMAC_SHA_1; + break; + case TEST_SHA_224_HMAC: + hash_alg = IMB_AUTH_HMAC_SHA_224; + break; + case TEST_SHA_256_HMAC: + hash_alg = IMB_AUTH_HMAC_SHA_256; + break; + case TEST_SHA_384_HMAC: + hash_alg = IMB_AUTH_HMAC_SHA_384; + break; + case TEST_SHA_512_HMAC: + hash_alg = IMB_AUTH_HMAC_SHA_512; + break; + case TEST_XCBC: + hash_alg = IMB_AUTH_AES_XCBC; + break; + case TEST_HASH_CCM: + hash_alg = IMB_AUTH_AES_CCM; + break; + case TEST_HASH_GCM: + if (segment_size != 0) + hash_alg = IMB_AUTH_GCM_SGL; + else + hash_alg = IMB_AUTH_AES_GMAC; + break; + case TEST_DOCSIS_CRC32: + hash_alg = IMB_AUTH_DOCSIS_CRC32; + break; + case TEST_NULL_HASH: + hash_alg = IMB_AUTH_NULL; + break; + case TEST_HASH_CMAC: + hash_alg = IMB_AUTH_AES_CMAC; + break; + case TEST_HASH_CMAC_BITLEN: + hash_alg = IMB_AUTH_AES_CMAC_BITLEN; + break; + case TEST_HASH_CMAC_256: + hash_alg = IMB_AUTH_AES_CMAC_256; + break; + case TEST_HASH_POLY1305: + hash_alg = IMB_AUTH_POLY1305; + break; + case TEST_AEAD_POLY1305: + if (segment_size != 0) + hash_alg = IMB_AUTH_CHACHA20_POLY1305_SGL; + else + hash_alg = IMB_AUTH_CHACHA20_POLY1305; + break; + case TEST_PON_CRC_BIP: + hash_alg = IMB_AUTH_PON_CRC_BIP; + break; + case TEST_ZUC_EIA3: + hash_alg = IMB_AUTH_ZUC_EIA3_BITLEN; + break; + case TEST_ZUC256_EIA3: + hash_alg = IMB_AUTH_ZUC256_EIA3_BITLEN; + break; + case TEST_SNOW3G_UIA2: + hash_alg = IMB_AUTH_SNOW3G_UIA2_BITLEN; + break; + case TEST_KASUMI_UIA1: + hash_alg = IMB_AUTH_KASUMI_UIA1; + break; + case TEST_AES_GMAC_128: + hash_alg = IMB_AUTH_AES_GMAC_128; + break; + case TEST_AES_GMAC_192: + hash_alg = IMB_AUTH_AES_GMAC_192; + break; + case TEST_AES_GMAC_256: + hash_alg = IMB_AUTH_AES_GMAC_256; + break; + case TEST_AUTH_GHASH: + hash_alg = IMB_AUTH_GHASH; + break; + case TEST_AUTH_SNOW_V_AEAD: + hash_alg = IMB_AUTH_SNOW_V_AEAD; + break; + case TEST_CRC32_ETHERNET_FCS: + hash_alg = IMB_AUTH_CRC32_ETHERNET_FCS; + break; + case TEST_CRC32_SCTP: + hash_alg = IMB_AUTH_CRC32_SCTP; + break; + case TEST_CRC32_WIMAX_OFDMA_DATA: + hash_alg = IMB_AUTH_CRC32_WIMAX_OFDMA_DATA; + break; + case TEST_CRC24_LTE_A: + hash_alg = IMB_AUTH_CRC24_LTE_A; + break; + case TEST_CRC24_LTE_B: + hash_alg = IMB_AUTH_CRC24_LTE_B; + break; + case TEST_CRC16_X25: + hash_alg = IMB_AUTH_CRC16_X25; + break; + case TEST_CRC16_FP_DATA: + hash_alg = IMB_AUTH_CRC16_FP_DATA; + break; + case TEST_CRC11_FP_HEADER: + hash_alg = IMB_AUTH_CRC11_FP_HEADER; + break; + case TEST_CRC10_IUUP_DATA: + hash_alg = IMB_AUTH_CRC10_IUUP_DATA; + break; + case TEST_CRC8_WIMAX_OFDMA_HCS: + hash_alg = IMB_AUTH_CRC8_WIMAX_OFDMA_HCS; + break; + case TEST_CRC7_FP_HEADER: + hash_alg = IMB_AUTH_CRC7_FP_HEADER; + break; + case TEST_CRC6_IUUP_HEADER: + hash_alg = IMB_AUTH_CRC6_IUUP_HEADER; + break; + case TEST_AUTH_SM3: + hash_alg = IMB_AUTH_SM3; + break; + case TEST_SM3_HMAC: + hash_alg = IMB_AUTH_HMAC_SM3; + break; + default: + break; + } + + return hash_alg; +} + static uint32_t get_next_size(const uint32_t index) { @@ -1879,173 +2035,81 @@ do_test(IMB_MGR *mb_mgr, struct params_s *params, const uint32_t num_iter, uint8 job_template.auth_tag_output = (uint8_t *) digest; + /* Translating enum to the API's one */ + job_template.hash_alg = translate_hash_alg(params->hash_alg); switch (params->hash_alg) { - case TEST_SHA1: - job_template.hash_alg = IMB_AUTH_SHA_1; - break; - case TEST_SHA_224: - job_template.hash_alg = IMB_AUTH_SHA_224; - break; - case TEST_SHA_256: - job_template.hash_alg = IMB_AUTH_SHA_256; - break; - case TEST_SHA_384: - job_template.hash_alg = IMB_AUTH_SHA_384; - break; - case TEST_SHA_512: - job_template.hash_alg = IMB_AUTH_SHA_512; - break; case TEST_XCBC: job_template.u.XCBC._k1_expanded = k1_expanded; job_template.u.XCBC._k2 = k2; job_template.u.XCBC._k3 = k3; - job_template.hash_alg = IMB_AUTH_AES_XCBC; - break; - case TEST_HASH_CCM: - job_template.hash_alg = IMB_AUTH_AES_CCM; - break; - case TEST_HASH_GCM: - if (segment_size != 0) - job_template.hash_alg = IMB_AUTH_GCM_SGL; - else - job_template.hash_alg = IMB_AUTH_AES_GMAC; - break; - case TEST_DOCSIS_CRC32: - job_template.hash_alg = IMB_AUTH_DOCSIS_CRC32; - break; - case TEST_NULL_HASH: - job_template.hash_alg = IMB_AUTH_NULL; break; case TEST_HASH_CMAC: job_template.u.CMAC._key_expanded = k1_expanded; job_template.u.CMAC._skey1 = k2; job_template.u.CMAC._skey2 = k3; - job_template.hash_alg = IMB_AUTH_AES_CMAC; break; case TEST_HASH_CMAC_BITLEN: job_template.u.CMAC._key_expanded = k1_expanded; job_template.u.CMAC._skey1 = k2; job_template.u.CMAC._skey2 = k3; - job_template.hash_alg = IMB_AUTH_AES_CMAC_BITLEN; break; case TEST_HASH_CMAC_256: job_template.u.CMAC._key_expanded = k1_expanded; job_template.u.CMAC._skey1 = k2; job_template.u.CMAC._skey2 = k3; - job_template.hash_alg = IMB_AUTH_AES_CMAC_256; break; case TEST_HASH_POLY1305: job_template.u.POLY1305._key = k1_expanded; - job_template.hash_alg = IMB_AUTH_POLY1305; - break; - case TEST_AEAD_POLY1305: - if (segment_size != 0) - job_template.hash_alg = IMB_AUTH_CHACHA20_POLY1305_SGL; - else - job_template.hash_alg = IMB_AUTH_CHACHA20_POLY1305; break; case TEST_PON_CRC_BIP: - job_template.hash_alg = IMB_AUTH_PON_CRC_BIP; job_template.cipher_start_src_offset_in_bytes = 8; break; case TEST_ZUC_EIA3: - job_template.hash_alg = IMB_AUTH_ZUC_EIA3_BITLEN; job_template.u.ZUC_EIA3._key = k3; job_template.u.ZUC_EIA3._iv = (uint8_t *) &auth_iv; break; case TEST_ZUC256_EIA3: - job_template.hash_alg = IMB_AUTH_ZUC256_EIA3_BITLEN; job_template.u.ZUC_EIA3._key = k3; job_template.u.ZUC_EIA3._iv = (uint8_t *) &auth_iv; break; case TEST_SNOW3G_UIA2: - job_template.hash_alg = IMB_AUTH_SNOW3G_UIA2_BITLEN; job_template.u.SNOW3G_UIA2._key = k3; job_template.u.SNOW3G_UIA2._iv = (uint8_t *) &auth_iv; break; case TEST_KASUMI_UIA1: - job_template.hash_alg = IMB_AUTH_KASUMI_UIA1; job_template.u.KASUMI_UIA1._key = k3; break; case TEST_AES_GMAC_128: - job_template.hash_alg = IMB_AUTH_AES_GMAC_128; IMB_AES128_GCM_PRE(mb_mgr, gcm_key, &gdata_key); job_template.u.GMAC._key = &gdata_key; job_template.u.GMAC._iv = (uint8_t *) &auth_iv; job_template.u.GMAC.iv_len_in_bytes = 12; break; case TEST_AES_GMAC_192: - job_template.hash_alg = IMB_AUTH_AES_GMAC_192; IMB_AES192_GCM_PRE(mb_mgr, gcm_key, &gdata_key); job_template.u.GMAC._key = &gdata_key; job_template.u.GMAC._iv = (uint8_t *) &auth_iv; job_template.u.GMAC.iv_len_in_bytes = 12; break; case TEST_AES_GMAC_256: - job_template.hash_alg = IMB_AUTH_AES_GMAC_256; IMB_AES256_GCM_PRE(mb_mgr, gcm_key, &gdata_key); job_template.u.GMAC._key = &gdata_key; job_template.u.GMAC._iv = (uint8_t *) &auth_iv; job_template.u.GMAC.iv_len_in_bytes = 12; break; case TEST_AUTH_GHASH: - job_template.hash_alg = IMB_AUTH_GHASH; IMB_GHASH_PRE(mb_mgr, gcm_key, &gdata_key); job_template.u.GHASH._key = &gdata_key; job_template.u.GHASH._init_tag = (uint8_t *) &auth_iv; break; - case TEST_AUTH_SNOW_V_AEAD: - job_template.hash_alg = IMB_AUTH_SNOW_V_AEAD; - break; - case TEST_CRC32_ETHERNET_FCS: - job_template.hash_alg = IMB_AUTH_CRC32_ETHERNET_FCS; - break; - case TEST_CRC32_SCTP: - job_template.hash_alg = IMB_AUTH_CRC32_SCTP; - break; - case TEST_CRC32_WIMAX_OFDMA_DATA: - job_template.hash_alg = IMB_AUTH_CRC32_WIMAX_OFDMA_DATA; - break; - case TEST_CRC24_LTE_A: - job_template.hash_alg = IMB_AUTH_CRC24_LTE_A; - break; - case TEST_CRC24_LTE_B: - job_template.hash_alg = IMB_AUTH_CRC24_LTE_B; - break; - case TEST_CRC16_X25: - job_template.hash_alg = IMB_AUTH_CRC16_X25; - break; - case TEST_CRC16_FP_DATA: - job_template.hash_alg = IMB_AUTH_CRC16_FP_DATA; - break; - case TEST_CRC11_FP_HEADER: - job_template.hash_alg = IMB_AUTH_CRC11_FP_HEADER; - break; - case TEST_CRC10_IUUP_DATA: - job_template.hash_alg = IMB_AUTH_CRC10_IUUP_DATA; - break; - case TEST_CRC8_WIMAX_OFDMA_HCS: - job_template.hash_alg = IMB_AUTH_CRC8_WIMAX_OFDMA_HCS; - break; - case TEST_CRC7_FP_HEADER: - job_template.hash_alg = IMB_AUTH_CRC7_FP_HEADER; - break; - case TEST_CRC6_IUUP_HEADER: - job_template.hash_alg = IMB_AUTH_CRC6_IUUP_HEADER; - break; - case TEST_AUTH_SM3: - job_template.hash_alg = IMB_AUTH_SM3; - break; case TEST_SM3_HMAC: job_template.u.HMAC._hashed_auth_key_xor_ipad = (uint8_t *) ipad; job_template.u.HMAC._hashed_auth_key_xor_opad = (uint8_t *) opad; - job_template.hash_alg = IMB_AUTH_HMAC_SM3; break; default: /* HMAC hash algorithm */ job_template.u.HMAC._hashed_auth_key_xor_ipad = (uint8_t *) ipad; job_template.u.HMAC._hashed_auth_key_xor_opad = (uint8_t *) opad; - job_template.hash_alg = (IMB_HASH_ALG) params->hash_alg; break; } if (tag_size == 0) @@ -2381,6 +2445,75 @@ do_test(IMB_MGR *mb_mgr, struct params_s *params, const uint32_t num_iter, uint8 } jobs_done = num_iter - num_jobs; + /* test AEAD burst api */ + } else if (test_api == TEST_API_AEAD_BURST) { + IMB_JOB jobs[MAX_BURST_SIZE]; + IMB_JOB *jt = &job_template; + uint32_t num_jobs = num_iter; + uint32_t list_idx; + + while (num_jobs && timebox_on) { + uint32_t n_jobs = (num_jobs / burst_size) ? burst_size : num_jobs; + + /* set all job params */ + for (i = 0; i < n_jobs; i++) { + job = &jobs[i]; + + /* If IMIX testing is being done, set the buffer + * size to cipher going through the + * list of sizes precalculated */ + if (imix_list_count != 0) { + list_idx = i & (JOB_SIZE_IMIX_LIST - 1); + job->msg_len_to_cipher_in_bytes = + cipher_size_list[list_idx]; + } else + job->msg_len_to_cipher_in_bytes = + jt->msg_len_to_cipher_in_bytes; + + job->src = get_src_buffer(index, p_buffer); + job->dst = get_dst_buffer(index, p_buffer); + job->enc_keys = job->dec_keys = + (const uint32_t *) get_key_pointer(index, p_keys); + job->cipher_start_src_offset_in_bytes = + jt->cipher_start_src_offset_in_bytes; + job->iv = jt->iv; + job->iv_len_in_bytes = jt->iv_len_in_bytes; + job->msg_len_to_hash_in_bytes = jt->msg_len_to_hash_in_bytes; + job->hash_start_src_offset_in_bytes = + jt->hash_start_src_offset_in_bytes; + job->auth_tag_output_len_in_bytes = + jt->auth_tag_output_len_in_bytes; + job->auth_tag_output = jt->auth_tag_output; + if (jt->cipher_mode == IMB_CIPHER_CCM) { + job->u.CCM.aad_len_in_bytes = aad_size; + job->u.CCM.aad = job->src; + } + + index = get_next_index(index); + } + /* submit AEAD burst */ +#ifdef DEBUG + const uint32_t completed_jobs = + IMB_SUBMIT_AEAD_BURST(mb_mgr, jobs, n_jobs, jt->cipher_mode, + jt->cipher_direction, jt->key_len_in_bytes); + + if (completed_jobs != n_jobs) { + const int err = imb_get_errno(mb_mgr); + + if (err != 0) { + printf("submit_aead_burst error " + "%d : '%s'\n", + err, imb_get_strerror(err)); + } + } +#else + IMB_SUBMIT_AEAD_BURST_NOCHECK(mb_mgr, jobs, n_jobs, jt->cipher_mode, + jt->cipher_direction, jt->key_len_in_bytes); +#endif + num_jobs -= n_jobs; + } + jobs_done = num_iter - num_jobs; + } else { /* TEST_API_JOB */ imb_set_session(mb_mgr, &job_template); @@ -4013,6 +4146,8 @@ main(int argc, char *argv[]) test_api = TEST_API_CIPHER_BURST; } else if (strcmp(argv[i], "--hash-burst-api") == 0) { test_api = TEST_API_HASH_BURST; + } else if (strcmp(argv[i], "--aead-burst-api") == 0) { + test_api = TEST_API_AEAD_BURST; } else if (strcmp(argv[i], "--burst-size") == 0) { i = get_next_num_arg((const char *const *) argv, i, argc, &burst_size, sizeof(burst_size)); @@ -4054,31 +4189,58 @@ main(int argc, char *argv[]) if (burst_size != 0 && test_api == TEST_API_JOB) { fprintf(stderr, "--burst-size can only be used with " - "--burst-api, --cipher-burst-api or " - "--hash-burst-api options\n"); + "--burst-api, --cipher-burst-api, " + "--hash-burst-api or --aead-burst-api options\n"); return EXIT_FAILURE; } if (test_api != TEST_API_JOB && burst_size == 0) burst_size = DEFAULT_BURST_SIZE; - /* currently only AES-CBC & CTR supported by cipher-only burst API */ - if (test_api == TEST_API_CIPHER_BURST && (custom_job_params.cipher_mode != TEST_CBC && - custom_job_params.cipher_mode != TEST_CNTR)) { - fprintf(stderr, "Unsupported cipher-only burst " - "API algorithm selected\n"); - return EXIT_FAILURE; - } + /* only a few algorithms support the hash-only/cipher-only/AEAD burst API */ + if (test_api == TEST_API_HASH_BURST || test_api == TEST_API_CIPHER_BURST || + test_api == TEST_API_AEAD_BURST) { + uint32_t optim_burst_size; + IMB_MGR *aux_mgr = alloc_mb_mgr(0); - /* currently only HMAC-SHAx algs supported by hash-only burst API */ - if (test_api == TEST_API_HASH_BURST && - ((custom_job_params.hash_alg != TEST_SHA1_HMAC) && - (custom_job_params.hash_alg != TEST_SHA_224_HMAC) && - (custom_job_params.hash_alg != TEST_SHA_256_HMAC) && - (custom_job_params.hash_alg != TEST_SHA_384_HMAC) && - (custom_job_params.hash_alg != TEST_SHA_512_HMAC))) { - fprintf(stderr, "Unsupported hash-only burst API algorithm selected\n"); - return EXIT_FAILURE; + if (aux_mgr == NULL) { + fprintf(stderr, "Error allocating MB_MGR structure!\n"); + return EXIT_FAILURE; + } + init_mb_mgr_auto(aux_mgr, NULL); + + if (test_api == TEST_API_HASH_BURST) { + if (imb_hash_burst_get_size(aux_mgr, + translate_hash_alg(custom_job_params.hash_alg), + &optim_burst_size) == IMB_ERR_HASH_ALGO) { + fprintf(stderr, + "Unsupported hash-only burst API algorithm selected\n"); + free_mb_mgr(aux_mgr); + return EXIT_FAILURE; + } + } else if (test_api == TEST_API_CIPHER_BURST) { + if (imb_cipher_burst_get_size( + aux_mgr, translate_cipher_mode(custom_job_params.cipher_mode), + &optim_burst_size) == IMB_ERR_CIPH_MODE) { + fprintf(stderr, + "Unsupported cipher-only burst API algorithm selected\n"); + free_mb_mgr(aux_mgr); + return EXIT_FAILURE; + } + } else { /* AEAD */ + if (imb_aead_burst_get_size( + aux_mgr, translate_cipher_mode(custom_job_params.cipher_mode), + &optim_burst_size) == IMB_ERR_CIPH_MODE) { + fprintf(stderr, "Unsupported AEAD burst API algorithm selected\n"); + free_mb_mgr(aux_mgr); + return EXIT_FAILURE; + } + } + + if (optim_burst_size > burst_size) + fprintf(stderr, "NOTE: Burst size is lower than the minimum size for an " + "optimal performance\n"); + free_mb_mgr(aux_mgr); } if (test_api == TEST_API_DIRECT && ((custom_job_params.cipher_mode != TEST_GCM) && @@ -4271,18 +4433,39 @@ main(int argc, char *argv[]) } #ifndef __aarch64__ - if (archs[ARCH_SSE]) { - IMB_MGR *p_mgr = alloc_mb_mgr(flags); + IMB_MGR *p_mgr = alloc_mb_mgr(flags); - if (p_mgr == NULL) { - fprintf(stderr, "Error allocating MB_MGR structure!\n"); - return EXIT_FAILURE; + if (p_mgr == NULL) { + fprintf(stderr, "Error allocating MB_MGR structure!\n"); + return EXIT_FAILURE; + } + + fprintf(stderr, "Testing "); + for (enum arch_type_e arch = ARCH_SSE; arch <= ARCH_AVX512; arch++) { + if (archs[arch] == 0) + continue; + + switch (arch) { + case ARCH_SSE: + init_mb_mgr_sse(p_mgr); + break; + case ARCH_AVX: + init_mb_mgr_avx(p_mgr); + break; + case ARCH_AVX2: + init_mb_mgr_avx2(p_mgr); + break; + default: /* ARCH_AV512 */ + init_mb_mgr_avx512(p_mgr); + break; } - init_mb_mgr_sse(p_mgr); - fprintf(stderr, "%s SHA extensions (shani) for SSE arch\n", - (p_mgr->features & IMB_FEATURE_SHANI) ? "Using" : "Not using"); - free_mb_mgr(p_mgr); + const char *arch_type; + + imb_get_arch_type_string(p_mgr, &arch_type, NULL); + fprintf(stderr, "\"%s\" ", arch_type); } + fprintf(stderr, "implementation/s\n"); + free_mb_mgr(p_mgr); #endif /* __aarch64__ */ memset(t_info, 0, sizeof(t_info)); diff --git a/test/kat-app/Makefile b/test/kat-app/Makefile index 0729b40af1cf4a44b3b003fa13a0f37756d91777..9b9f77ec4c1f16a3f2f0d71c515e793f203bb14f 100644 --- a/test/kat-app/Makefile +++ b/test/kat-app/Makefile @@ -40,8 +40,8 @@ SOURCES := main.c gcm_test.c ctr_test.c customop_test.c des_test.c ccm_test.c \ chacha20_poly1305_test.c null_test.c snow_v_test.c direct_api_param_test.c quic_ecb_test.c \ hmac_sha1.json.c hmac_sha224.json.c hmac_sha256.json.c hmac_sha384.json.c hmac_sha512.json.c \ hmac_md5.json.c gmac_test.json.c ghash_test.c ghash_test.json.c poly1305_test.json.c \ - cmac_test.json.c xcbc_test.json.c sha_test.json.c aes_cbcs_test.json.c gmac_test.c aes_cfb_test.c \ - ecb_test.json.c aes_cfb_test.json.c aes_cbc_test.c aes_cbc_test.json.c ctr_test.json.c \ + cmac_test.json.c xcbc_test.json.c sha_test.json.c aes_cbcs_test.json.c gmac_test.c aes_cfb_one_block_test.c \ + ecb_test.json.c aes_cfb_one_block_test.json.c aes_cbc_test.c aes_cbc_test.json.c ctr_test.json.c \ des_test.json.c chacha_test.json.c gcm_test.json.c ccm_test.json.c quic_chacha20_test.c \ chacha20_poly1305_test.json.c snow3g_test_f8_vectors.json.c snow3g_test_f9_vectors.json.c \ sm4_ecb_test.c sm4_ecb_test.json.c sm4_cbc_test.c sm4_cbc_test.json.c sm3_test.c \ diff --git a/test/kat-app/aes_cfb_test.c b/test/kat-app/aes_cfb_one_block_test.c similarity index 92% rename from test/kat-app/aes_cfb_test.c rename to test/kat-app/aes_cfb_one_block_test.c index 53b860c84bd5363f43100ec95f6497a982aacdf0..9466eff2576493d22d9fe3012f0f4db172116045 100644 --- a/test/kat-app/aes_cfb_test.c +++ b/test/kat-app/aes_cfb_one_block_test.c @@ -37,16 +37,16 @@ #include "cipher_test.h" int -cfb_test(struct IMB_MGR *mb_mgr); +cfb_one_block_test(struct IMB_MGR *mb_mgr); -extern const struct cipher_test cfb_test_json[]; +extern const struct cipher_test cfb_one_block_test_json[]; static int cfb_validate_ok(const uint8_t *output, const uint8_t *in_text, const size_t plen, const uint32_t klen, const unsigned i, const unsigned is_enc, const int in_place) { if (memcmp(output, in_text, plen) != 0) { - printf("\nAES-CFB%s standard test vector %u %s (%s): fail\n", + printf("\nAES-CFB-ONE%s standard test vector %u %s (%s): fail\n", (klen == 16) ? "128" : "256", i + 1, (is_enc) ? "encrypt" : "decrypt", (in_place) ? "in-place" : "out-of-place"); return 0; @@ -129,14 +129,14 @@ static void cfb_test_vectors(struct IMB_MGR *mb_mgr, struct test_suite_context *ctx128, struct test_suite_context *ctx256) { - const struct cipher_test *v = cfb_test_json; + const struct cipher_test *v = cfb_one_block_test_json; for (; v->msg != NULL; v++) { struct test_suite_context *ctx; if (!quiet_mode) { #ifdef DEBUG - printf("AES-CFB Test Case %zu key_len:%zu\n", v->tcId, v->keySize); + printf("AES-CFB-ONE Test Case %zu key_len:%zu\n", v->tcId, v->keySize); #else printf("."); #endif @@ -156,14 +156,14 @@ cfb_test_vectors(struct IMB_MGR *mb_mgr, struct test_suite_context *ctx128, } int -cfb_test(struct IMB_MGR *mb_mgr) +cfb_one_block_test(struct IMB_MGR *mb_mgr) { int errors = 0; struct test_suite_context ctx128; struct test_suite_context ctx256; - test_suite_start(&ctx128, "AES-CFB-128"); - test_suite_start(&ctx256, "AES-CFB-256"); + test_suite_start(&ctx128, "AES-CFB-128 ONE-BLOCK"); + test_suite_start(&ctx256, "AES-CFB-256 ONE-BLOCK"); cfb_test_vectors(mb_mgr, &ctx128, &ctx256); errors += test_suite_end(&ctx128); errors += test_suite_end(&ctx256); diff --git a/test/kat-app/aes_cfb_test.json.c b/test/kat-app/aes_cfb_one_block_test.json.c similarity index 99% rename from test/kat-app/aes_cfb_test.json.c rename to test/kat-app/aes_cfb_one_block_test.json.c index 3ee167cc3a2280ca0fe50b00f2385b0c9f064584..342dfb328beb2a6c03807d01386c0d08e518a5bb 100644 --- a/test/kat-app/aes_cfb_test.json.c +++ b/test/kat-app/aes_cfb_one_block_test.json.c @@ -27,7 +27,7 @@ /* CFB */ #include "cipher_test.h" -const struct cipher_test cfb_test_json[] = { +const struct cipher_test cfb_one_block_test_json[] = { /* Vectors from CM-SP-SECv3.1-I06-160602 section I.10.2 */ { 128, 128, 1, "\x01\x23\x45\x67\x89\xab\xcd\xef\x01\x23\x45\x67\x89\xab\xcd\xef", "\x12\x34\x56\x78\x90\xab\xcd\xef\x12\x34\x56\x78\x90\xab\xcd\xef", diff --git a/test/kat-app/ccm_test.c b/test/kat-app/ccm_test.c index ca3a347ddf729b54bc4fd5f6e5413dc7d189437c..4e5709085f15dfef0dcdc7baafab0d28af73055f 100644 --- a/test/kat-app/ccm_test.c +++ b/test/kat-app/ccm_test.c @@ -144,6 +144,145 @@ ccm_job_ok(const struct aead_test *vec, const struct IMB_JOB *job, const uint8_t return 1; } +static int +test_ccm_aead_burst(struct IMB_MGR *mb_mgr, const struct aead_test *vec, const int dir, + const int in_place, const int num_jobs, const uint64_t key_length) +{ + DECLARE_ALIGNED(uint32_t expkey[4 * 15], 16); + DECLARE_ALIGNED(uint32_t dust[4 * 15], 16); + struct IMB_JOB *job, jobs[IMB_MAX_BURST_SIZE]; + uint8_t padding[16]; + uint8_t **targets = malloc(num_jobs * sizeof(void *)); + uint8_t **auths = malloc(num_jobs * sizeof(void *)); + int i, completed_jobs, jobs_rx = 0, ret = -1; + const int order = (dir == IMB_DIR_ENCRYPT) ? IMB_ORDER_HASH_CIPHER : IMB_ORDER_CIPHER_HASH; + + if (targets == NULL || auths == NULL) { + fprintf(stderr, "Can't allocate buffer memory\n"); + goto end2; + } + + memset(padding, -1, sizeof(padding)); + memset(targets, 0, num_jobs * sizeof(void *)); + memset(auths, 0, num_jobs * sizeof(void *)); + + for (i = 0; i < num_jobs; i++) { + targets[i] = malloc(vec->msgSize / 8 + (sizeof(padding) * 2)); + auths[i] = malloc(16 + (sizeof(padding) * 2)); + if (targets[i] == NULL || auths[i] == NULL) { + fprintf(stderr, "Can't allocate buffer memory\n"); + goto end; + } + + memset(targets[i], -1, vec->msgSize / 8 + (sizeof(padding) * 2)); + memset(auths[i], -1, 16 + (sizeof(padding) * 2)); + + if (in_place) { + if (dir == IMB_DIR_ENCRYPT) + memcpy(targets[i] + sizeof(padding), (const void *) vec->msg, + vec->msgSize / 8); + else + memcpy(targets[i] + sizeof(padding), (const void *) vec->ct, + vec->msgSize / 8); + } + } + + if (key_length == 16) + IMB_AES_KEYEXP_128(mb_mgr, vec->key, expkey, dust); + else + IMB_AES_KEYEXP_256(mb_mgr, vec->key, expkey, dust); + + for (i = 0; i < num_jobs; i++) { + job = &jobs[i]; + job->cipher_direction = dir; + job->chain_order = order; + if (in_place) { + job->dst = targets[i] + sizeof(padding) + vec->aadSize / 8; + job->src = targets[i] + sizeof(padding); + } else { + if (dir == IMB_DIR_ENCRYPT) { + job->dst = targets[i] + sizeof(padding); + job->src = (const void *) vec->msg; + } else { + job->dst = targets[i] + sizeof(padding); + job->src = (const void *) vec->ct; + } + } + job->cipher_mode = IMB_CIPHER_CCM; + job->enc_keys = expkey; + job->dec_keys = expkey; + job->key_len_in_bytes = key_length; + job->iv = (const void *) vec->iv; + job->iv_len_in_bytes = vec->ivSize / 8; + job->cipher_start_src_offset_in_bytes = vec->aadSize / 8; + job->msg_len_to_cipher_in_bytes = vec->msgSize / 8 - vec->aadSize / 8; + + job->hash_alg = IMB_AUTH_AES_CCM; + job->hash_start_src_offset_in_bytes = vec->aadSize / 8; + job->msg_len_to_hash_in_bytes = vec->msgSize / 8 - vec->aadSize / 8; + job->auth_tag_output = auths[i] + sizeof(padding); + job->auth_tag_output_len_in_bytes = vec->tagSize / 8; + + job->u.CCM.aad_len_in_bytes = vec->aadSize / 8; + job->u.CCM.aad = job->src; + + job->user_data = targets[i]; + job->user_data2 = auths[i]; + } + + completed_jobs = + IMB_SUBMIT_AEAD_BURST(mb_mgr, jobs, num_jobs, IMB_CIPHER_CCM, dir, key_length); + if (completed_jobs != num_jobs) { + int err = imb_get_errno(mb_mgr); + + if (err != 0) { + printf("submit_burst error %d : '%s'\n", err, imb_get_strerror(err)); + goto end; + } else { + printf("submit_burst error: not enough " + "jobs returned!\n"); + goto end; + } + } + + for (i = 0; i < num_jobs; i++) { + job = &jobs[i]; + + if (job->status != IMB_STATUS_COMPLETED) { + printf("job %d status not complete!\n", i + 1); + goto end; + } + + jobs_rx++; + if (!ccm_job_ok(vec, job, job->user_data, padding, job->user_data2, sizeof(padding), + dir, in_place)) + goto end; + } + + if (jobs_rx != num_jobs) { + printf("Expected %d jobs, received %d\n", num_jobs, jobs_rx); + goto end; + } + ret = 0; + +end: + for (i = 0; i < num_jobs; i++) { + if (targets[i] != NULL) + free(targets[i]); + if (auths[i] != NULL) + free(auths[i]); + } + +end2: + if (targets != NULL) + free(targets); + + if (auths != NULL) + free(auths); + + return ret; +} + static int test_ccm(struct IMB_MGR *mb_mgr, const struct aead_test *vec, const int dir, const int in_place, const int num_jobs, const uint64_t key_length) @@ -327,6 +466,38 @@ test_ccm_128_std_vectors(struct IMB_MGR *mb_mgr, struct test_suite_context *ctx, } else { test_suite_update(ctx, 1, 0); } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_ENCRYPT, 1, num_jobs, + IMB_KEY_128_BYTES)) { + printf("error #%zu encrypt in-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_DECRYPT, 1, num_jobs, + IMB_KEY_128_BYTES)) { + printf("error #%zu decrypt in-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_ENCRYPT, 0, num_jobs, + IMB_KEY_128_BYTES)) { + printf("error #%zu encrypt out-of-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_DECRYPT, 0, num_jobs, + IMB_KEY_128_BYTES)) { + printf("error #%zu decrypt out-of-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } } if (!quiet_mode) printf("\n"); @@ -379,6 +550,38 @@ test_ccm_256_std_vectors(struct IMB_MGR *mb_mgr, struct test_suite_context *ctx, } else { test_suite_update(ctx, 1, 0); } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_ENCRYPT, 1, num_jobs, + IMB_KEY_256_BYTES)) { + printf("error #%zu encrypt in-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_DECRYPT, 1, num_jobs, + IMB_KEY_256_BYTES)) { + printf("error #%zu decrypt in-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_ENCRYPT, 0, num_jobs, + IMB_KEY_256_BYTES)) { + printf("error #%zu encrypt out-of-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } + + if (test_ccm_aead_burst(mb_mgr, v, IMB_DIR_DECRYPT, 0, num_jobs, + IMB_KEY_256_BYTES)) { + printf("error #%zu decrypt out-of-place (aead burst)\n", v->tcId); + test_suite_update(ctx, 0, 1); + } else { + test_suite_update(ctx, 1, 0); + } } if (!quiet_mode) printf("\n"); diff --git a/test/kat-app/main.c b/test/kat-app/main.c index a8ab2452635864f56212e32f647e561a4664c79c..e2d3fdf530f424f0f39291cfd72976c1f9d75660 100644 --- a/test/kat-app/main.c +++ b/test/kat-app/main.c @@ -102,7 +102,7 @@ ghash_test(struct IMB_MGR *mb_mgr); extern int cbc_test(struct IMB_MGR *mb_mgr); extern int -cfb_test(struct IMB_MGR *mb_mgr); +cfb_one_block_test(struct IMB_MGR *mb_mgr); extern int ctr_test(struct IMB_MGR *mb_mgr); extern int @@ -135,7 +135,7 @@ struct imb_test tests[] = { { .str = "KAT", .fn = known_answer_test, .enabled = 1 }, { .str = "DO_TEST", .fn = do_test, .enabled = 1 }, { .str = "CBC", .fn = cbc_test, .enabled = 1 }, - { .str = "CFB", .fn = cfb_test, .enabled = 1 }, + { .str = "CFB", .fn = cfb_one_block_test, .enabled = 1 }, { .str = "CTR", .fn = ctr_test, .enabled = 1 }, { .str = "PON", .fn = pon_test, .enabled = 1 }, { .str = "XCBC", .fn = xcbc_test, .enabled = 1 }, diff --git a/test/kat-app/win_x64.mak b/test/kat-app/win_x64.mak index 6156f7c0fc83bdb1ccbf81d7a74397ac0bac4cca..e09eecfd818bd4e0815582c4456916d937c348b4 100644 --- a/test/kat-app/win_x64.mak +++ b/test/kat-app/win_x64.mak @@ -29,7 +29,7 @@ APP = imb-kat include ..\common\win_x64_common.mk -TEST_OBJS = utils.obj main.obj gcm_test.obj ctr_test.obj customop_test.obj des_test.obj ccm_test.obj cmac_test.obj hmac_sha1_test.obj hmac_sha256_sha512_test.obj hmac_md5_test.obj aes_test.obj sha_test.obj chained_test.obj api_test.obj pon_test.obj ecb_test.obj zuc_eea3_test.obj zuc_eia3_test.obj kasumi_test.obj snow3g_test.obj direct_api_test.obj clear_mem_test.obj hec_test.obj xcbc_test.obj aes_cbcs_test.obj crc_test.obj chacha_test.obj poly1305_test.obj chacha20_poly1305_test.obj null_test.obj snow_v_test.obj direct_api_param_test.obj quic_ecb_test.obj hmac_sha1.json.obj hmac_sha224.json.obj hmac_sha256.json.obj hmac_sha384.json.obj hmac_sha512.json.obj hmac_md5.json.obj gmac_test.obj gmac_test.json.obj ghash_test.obj ghash_test.json.obj poly1305_test.json.obj cmac_test.json.obj xcbc_test.json.obj sha_test.json.obj aes_cfb_test.obj aes_cfb_test.json.obj aes_cbcs_test.json.obj aes_cbc_test.obj aes_cbc_test.json.obj ecb_test.json.obj ctr_test.json.obj chacha_test.json.obj des_test.json.obj gcm_test.json.obj quic_chacha20_test.obj chacha20_poly1305_test.json.obj ccm_test.json.obj snow3g_test_f8_vectors.json.obj snow3g_test_f9_vectors.json.obj sm4_ecb_test.obj sm4_ecb_test.json.obj sm4_cbc_test.obj sm4_cbc_test.json.obj sm3_test.obj sm3_test.json.obj zuc_eia3_128.json.obj zuc_eia3_256.json.obj zuc_eea3_128.json.obj zuc_eea3_256.json.obj kasumi_f8.json.obj kasumi_f9.json.obj snow_v_test.json.obj hmac_sm3_test.obj hmac_sm3.json.obj snow_v_aead.json.obj +TEST_OBJS = utils.obj main.obj gcm_test.obj ctr_test.obj customop_test.obj des_test.obj ccm_test.obj cmac_test.obj hmac_sha1_test.obj hmac_sha256_sha512_test.obj hmac_md5_test.obj aes_test.obj sha_test.obj chained_test.obj api_test.obj pon_test.obj ecb_test.obj zuc_eea3_test.obj zuc_eia3_test.obj kasumi_test.obj snow3g_test.obj direct_api_test.obj clear_mem_test.obj hec_test.obj xcbc_test.obj aes_cbcs_test.obj crc_test.obj chacha_test.obj poly1305_test.obj chacha20_poly1305_test.obj null_test.obj snow_v_test.obj direct_api_param_test.obj quic_ecb_test.obj hmac_sha1.json.obj hmac_sha224.json.obj hmac_sha256.json.obj hmac_sha384.json.obj hmac_sha512.json.obj hmac_md5.json.obj gmac_test.obj gmac_test.json.obj ghash_test.obj ghash_test.json.obj poly1305_test.json.obj cmac_test.json.obj xcbc_test.json.obj sha_test.json.obj aes_cfb_one_block_test.obj aes_cfb_one_block_test.json.obj aes_cbcs_test.json.obj aes_cbc_test.obj aes_cbc_test.json.obj ecb_test.json.obj ctr_test.json.obj chacha_test.json.obj des_test.json.obj gcm_test.json.obj quic_chacha20_test.obj chacha20_poly1305_test.json.obj ccm_test.json.obj snow3g_test_f8_vectors.json.obj snow3g_test_f9_vectors.json.obj sm4_ecb_test.obj sm4_ecb_test.json.obj sm4_cbc_test.obj sm4_cbc_test.json.obj sm3_test.obj sm3_test.json.obj zuc_eia3_128.json.obj zuc_eia3_256.json.obj zuc_eea3_128.json.obj zuc_eea3_256.json.obj kasumi_f8.json.obj kasumi_f9.json.obj snow_v_test.json.obj hmac_sm3_test.obj hmac_sm3.json.obj snow_v_aead.json.obj TEST_LFLAGS = /out:$(APP).exe $(DLFLAGS) diff --git a/test/mp-app/CMakeLists.txt b/test/mp-app/CMakeLists.txt index 839abc7185a737af9e964ded24d58be6292e98c8..79a3af90a4565bc01a985e12875ea55eb3f43ab7 100644 --- a/test/mp-app/CMakeLists.txt +++ b/test/mp-app/CMakeLists.txt @@ -96,6 +96,12 @@ else() set(TEST_APP_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}") endif() +# append config type for multi-config generators +get_property(multi_config_gen GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if (multi_config_gen) + STRING(APPEND TEST_APP_BIN_DIR "/$,Debug,Release>") +endif() + add_test(NAME MULTIPROCESS COMMAND ${MP_APP_PRI} ${TEST_APP_BIN_DIR}/${MP_APP_SEC} WORKING_DIRECTORY ${TEST_APP_BIN_DIR}) diff --git a/test/mp-app/imb-mp-primary.c b/test/mp-app/imb-mp-primary.c index d44913a3b91c2e91e3628e2576604ce904e97d8e..b45c78c7726aeb47ded967f21ee25fae7cdfff1a 100644 --- a/test/mp-app/imb-mp-primary.c +++ b/test/mp-app/imb-mp-primary.c @@ -50,6 +50,11 @@ mp_primary(const char *name2) #if defined(__linux__) || defined(__FreeBSD__) #include +#include /* close() and unlink() */ +#endif + +#ifdef _WIN32 +#include /* _mktemp() */ #endif /* @@ -136,25 +141,87 @@ prepare_reference_output(struct info_context *ctx, const int is_pri) return 0; } +static char * +randomize_shm_name(const char *name) +{ + if (name == NULL) + return NULL; + + char temp[8]; + + memset(temp, 0, sizeof(temp)); + strncpy(temp, "XXXXXX", sizeof(temp) - 1); + +#if defined(__linux__) || defined(__FreeBSD__) + int fd = mkstemp(temp); + + if (fd == -1) + return NULL; + + close(fd); + unlink(temp); +#endif + +#ifdef _WIN32 + (void) _mktemp(temp); +#endif + + const size_t name_len = strlen(name); + const size_t temp_len = strlen(temp); + const size_t new_len = name_len + temp_len + 1; + char *new_name = malloc(new_len); + + if (new_name == NULL) + return NULL; + + const int ret_len = snprintf(new_name, new_len, "%s%s", name, temp); + + if (ret_len >= (int) new_len || ret_len < 0) { + free(new_name); + return NULL; + } + + return new_name; +} + static int mp_primary(const char *name2) { const int is_pri = 1; + + char *shm_info_uname = randomize_shm_name(SHM_INFO_NAME); + + if (shm_info_uname == NULL) + return -1; + + char *shm_data_uname = randomize_shm_name(SHM_DATA_NAME); + + if (shm_data_uname == NULL) { + free(shm_info_uname); + return -1; + } + + fprintf(stdout, "PRIMARY: init start %p, %s, %s\n", (void *) imb_get_errno, shm_info_uname, + shm_data_uname); + struct shared_memory app_shm, info_shm; struct info_context *ctx = NULL; struct allocator app_alloc; - fprintf(stdout, "PRIMARY: init start %p\n", (void *) imb_get_errno); - - if (shm_create(&info_shm, is_pri, SHM_INFO_NAME, SHM_INFO_SIZE, NULL) != 0) + if (shm_create(&info_shm, is_pri, shm_info_uname, SHM_INFO_SIZE, NULL) != 0) { + free(shm_info_uname); + free(shm_data_uname); return -1; + } /* cast info shared memory onto info context structure */ ctx = (struct info_context *) info_shm.ptr; memset(ctx, 0, sizeof(*ctx)); - if (shm_create(&app_shm, is_pri, SHM_DATA_NAME, SHM_DATA_SIZE, NULL) != 0) { + if (shm_create(&app_shm, is_pri, shm_data_uname, SHM_DATA_SIZE, NULL) != 0) { (void) shm_destroy(&info_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } @@ -169,6 +236,8 @@ mp_primary(const char *name2) if (ctx->mb_mgr == NULL) { (void) shm_destroy(&info_shm, is_pri); (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } @@ -178,6 +247,8 @@ mp_primary(const char *name2) if (alloc_crypto_op_data(ctx, &app_alloc, is_pri) != 0) { (void) shm_destroy(&info_shm, is_pri); (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } @@ -185,6 +256,8 @@ mp_primary(const char *name2) if (prepare_reference_output(ctx, is_pri) != 0) { (void) shm_destroy(&info_shm, is_pri); (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } @@ -196,6 +269,8 @@ mp_primary(const char *name2) &ctx->jobs_sent, ctx->exp_enc_key, ctx->iv, buffer_size) != 0) { (void) shm_destroy(&info_shm, is_pri); (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } @@ -205,6 +280,8 @@ mp_primary(const char *name2) if (ctx->jobs_sent != IMB_DIM(ctx->buffer_table_in_out)) { (void) shm_destroy(&info_shm, is_pri); (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } @@ -215,7 +292,35 @@ mp_primary(const char *name2) */ fprintf(stdout, "PRIMARY: starting SECONDARY process now\n"); - const int status = system(name2); + const size_t cmd_length = + strlen(name2) + 1 + strlen(shm_info_uname) + 1 + strlen(shm_data_uname) + 1; + char *cmd = malloc(cmd_length); + + if (cmd == NULL) { + (void) shm_destroy(&info_shm, is_pri); + (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); + return -1; + } + + memset(cmd, 0, cmd_length); + + const int cmd_length_ret = + snprintf(cmd, cmd_length, "%s %s %s", name2, shm_info_uname, shm_data_uname); + + if (cmd_length_ret >= (int) cmd_length || cmd_length_ret < 0) { + (void) shm_destroy(&info_shm, is_pri); + (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); + free(cmd); + return -1; + } + + const int status = system(cmd); + + free(cmd); #ifdef _WIN32 const int err = (status != EXIT_SUCCESS); @@ -230,6 +335,8 @@ mp_primary(const char *name2) fprintf(stdout, "MULTI-PROCESS TEST: FAILED\n"); (void) shm_destroy(&info_shm, is_pri); (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } @@ -251,11 +358,18 @@ mp_primary(const char *name2) /* clean up and exit */ if (shm_destroy(&info_shm, is_pri) != 0) { (void) shm_destroy(&app_shm, is_pri); + free(shm_info_uname); + free(shm_data_uname); return -1; } - if (shm_destroy(&app_shm, is_pri) != 0) + if (shm_destroy(&app_shm, is_pri) != 0) { + free(shm_info_uname); + free(shm_data_uname); return -1; + } + free(shm_info_uname); + free(shm_data_uname); return 0; } #endif /* _WIN32 || __linux__ || __FreeBSD__ */ diff --git a/test/mp-app/imb-mp-secondary.c b/test/mp-app/imb-mp-secondary.c index 7df2ff8657ce0b5c5bf4ec64dc2a8d50fba1d13e..8d32f3058a2de3081ab3803a00ddd3522fba0bb9 100644 --- a/test/mp-app/imb-mp-secondary.c +++ b/test/mp-app/imb-mp-secondary.c @@ -38,8 +38,10 @@ #if defined(__MINGW32__) static int -mp_secondary(void) +mp_secondary(const char *shm_info_uname, const char *shm_data_uname) { + (void) shm_info_uname; + (void) shm_data_uname; printf("Multi-Process test not executed.\n"); return 0; } @@ -53,15 +55,16 @@ mp_secondary(void) */ static int -mp_secondary(void) +mp_secondary(const char *shm_info_uname, const char *shm_data_uname) { const int is_pri = 0; struct shared_memory app_shm, info_shm; struct info_context *ctx = NULL; - fprintf(stdout, "SECONDARY: init start %p\n", (void *) imb_get_errno); + fprintf(stdout, "SECONDARY: init start %p, %s, %s\n", (void *) imb_get_errno, + shm_info_uname, shm_data_uname); - if (shm_create(&info_shm, is_pri, SHM_INFO_NAME, SHM_INFO_SIZE, NULL) != 0) + if (shm_create(&info_shm, is_pri, shm_info_uname, SHM_INFO_SIZE, NULL) != 0) return -1; /* cast info shared memory onto info context structure */ @@ -73,7 +76,7 @@ mp_secondary(void) return -1; } - if (shm_create(&app_shm, is_pri, SHM_DATA_NAME, SHM_DATA_SIZE, ctx->app_mmap) != 0) { + if (shm_create(&app_shm, is_pri, shm_data_uname, SHM_DATA_SIZE, ctx->app_mmap) != 0) { (void) shm_destroy(&info_shm, is_pri); return -1; } @@ -130,10 +133,13 @@ mp_secondary(void) int main(int argc, char **argv) { + int ret = -1; + (void) argc; (void) argv; - const int ret = mp_secondary(); + if (argc == 3) + ret = mp_secondary(argv[1], argv[2]); return (ret == 0) ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/test/mp-app/mp_shared_mem.c b/test/mp-app/mp_shared_mem.c index b996335c7cd68f17b2273454d08d66bedc8cc441..816cafdffd35e275a3c51b0cf2b4a90820817abb 100644 --- a/test/mp-app/mp_shared_mem.c +++ b/test/mp-app/mp_shared_mem.c @@ -86,13 +86,17 @@ shm_destroy(struct shared_memory *sm, const int is_pri) int ret = 0; if (!is_pri) - if (munmap(sm->ptr, sm->size) != 0) + if (munmap(sm->ptr, sm->size) != 0) { + perror("shm_destroy()"); ret = -1; + } sm->ptr = NULL; if (is_pri) - if (shm_unlink(sm->name) != 0) + if (shm_unlink(sm->name) != 0) { + perror("shm_destroy()"); ret = -1; + } sm->name = NULL; sm->size = 0; @@ -110,17 +114,27 @@ shm_create(struct shared_memory *sm, const int is_pri, const char *name, const s sm->ptr = MAP_FAILED; /* create the shared memory object */ - if (is_pri) + if (is_pri) { + fd = shm_open(sm->name, O_RDWR, 0666); + if (fd != -1) { + printf("shm_open(): %s already exists!\n", sm->name); + close(fd); + return -1; + } fd = shm_open(sm->name, O_CREAT | O_RDWR, 0666); - else + } else { fd = shm_open(sm->name, O_RDWR, 0666); + } - if (fd == -1) + if (fd == -1) { + perror("shm_create()"); return -1; + } /* configure the size of the shared memory object */ if (is_pri) { if (ftruncate(fd, sm->size) != 0) { + perror("shm_create()"); (void) shm_destroy(sm, is_pri); close(fd); return -1; @@ -170,6 +184,7 @@ shm_create(struct shared_memory *sm, const int is_pri, const char *name, const s close(fd); if (sm->ptr == MAP_FAILED) { + perror("shm_create()"); fprintf(stderr, "!mmap() of %s shared memory error\n", sm->name); (void) shm_destroy(sm, is_pri); return -1;