/* armv8-sha512-asm * * Copyright (C) 2006-2026 wolfSSL Inc. * * This file is part of wolfSSL. * * wolfSSL is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * wolfSSL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ #include #include /* Generated using (from wolfssl): * cd ../scripts * ruby ./sha2/sha512.rb arm64 \ * ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifdef WOLFSSL_ARMASM_INLINE #include #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) XALIGNED(16) static const word64 L_SHA512_transform_neon_len_k[] = { 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, 0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, 0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; XALIGNED(16) static const word64 L_SHA512_transform_neon_len_r8[] = { 0x0007060504030201UL, 0x080f0e0d0c0b0a09UL, }; void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) { const word64* k = L_SHA512_transform_neon_len_k; const word64* r8 = L_SHA512_transform_neon_len_r8; __asm__ __volatile__ ( "ld1 {v11.16b}, [%[r8]]\n\t" /* Load digest into working vars */ "ldp x4, x5, [%x[sha512]]\n\t" "ldp x6, x7, [%x[sha512], #16]\n\t" "ldp x8, x9, [%x[sha512], #32]\n\t" "ldp x10, x11, [%x[sha512], #48]\n\t" /* Start of loop processing a block */ "\n" "L_sha512_len_neon_begin_%=:\n\t" /* Load W */ /* Copy digest to add in at end */ "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[data]], #0x40\n\t" "mov x19, x4\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[data]], #0x40\n\t" "mov x20, x5\n\t" "rev64 v0.16b, v0.16b\n\t" "mov x21, x6\n\t" "rev64 v1.16b, v1.16b\n\t" "mov x22, x7\n\t" "rev64 v2.16b, v2.16b\n\t" "mov x23, x8\n\t" "rev64 v3.16b, v3.16b\n\t" "mov x24, x9\n\t" "rev64 v4.16b, v4.16b\n\t" "mov x25, x10\n\t" "rev64 v5.16b, v5.16b\n\t" "mov x26, x11\n\t" "rev64 v6.16b, v6.16b\n\t" "rev64 v7.16b, v7.16b\n\t" /* Pre-calc: b ^ c */ "eor x16, x5, x6\n\t" "mov %[r8], #4\n\t" /* Start of 16 rounds */ "\n" "L_sha512_len_neon_start_%=:\n\t" /* Round 0 */ "mov x13, v0.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 1 */ "mov x13, v0.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v0.16b, v1.16b, #8\n\t" "ror x12, x7, #14\n\t" "shl v8.2d, v7.2d, #45\n\t" "ror x14, x11, #28\n\t" "sri v8.2d, v7.2d, #19\n\t" "eor x12, x12, x7, ror 18\n\t" "shl v9.2d, v7.2d, #3\n\t" "eor x14, x14, x11, ror 34\n\t" "sri v9.2d, v7.2d, #61\n\t" "eor x12, x12, x7, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x11, ror 39\n\t" "ushr v8.2d, v7.2d, #6\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x11, x4\n\t" "add v0.2d, v0.2d, v9.2d\n\t" "eor x12, x8, x9\n\t" "ext v9.16b, v4.16b, v5.16b, #8\n\t" "and x17, x16, x17\n\t" "add v0.2d, v0.2d, v9.2d\n\t" "and x12, x12, x7\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x10, x10, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x9\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x10, x10, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x4\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v0.2d, v0.2d, v9.2d\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 2 */ "mov x13, v1.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 3 */ "mov x13, v1.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v1.16b, v2.16b, #8\n\t" "ror x12, x5, #14\n\t" "shl v8.2d, v0.2d, #45\n\t" "ror x14, x9, #28\n\t" "sri v8.2d, v0.2d, #19\n\t" "eor x12, x12, x5, ror 18\n\t" "shl v9.2d, v0.2d, #3\n\t" "eor x14, x14, x9, ror 34\n\t" "sri v9.2d, v0.2d, #61\n\t" "eor x12, x12, x5, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x9, ror 39\n\t" "ushr v8.2d, v0.2d, #6\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x9, x10\n\t" "add v1.2d, v1.2d, v9.2d\n\t" "eor x12, x6, x7\n\t" "ext v9.16b, v5.16b, v6.16b, #8\n\t" "and x17, x16, x17\n\t" "add v1.2d, v1.2d, v9.2d\n\t" "and x12, x12, x5\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x8, x8, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x7\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x8, x8, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x10\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v1.2d, v1.2d, v9.2d\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 4 */ "mov x13, v2.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 5 */ "mov x13, v2.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v2.16b, v3.16b, #8\n\t" "ror x12, x11, #14\n\t" "shl v8.2d, v1.2d, #45\n\t" "ror x14, x7, #28\n\t" "sri v8.2d, v1.2d, #19\n\t" "eor x12, x12, x11, ror 18\n\t" "shl v9.2d, v1.2d, #3\n\t" "eor x14, x14, x7, ror 34\n\t" "sri v9.2d, v1.2d, #61\n\t" "eor x12, x12, x11, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x7, ror 39\n\t" "ushr v8.2d, v1.2d, #6\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x7, x8\n\t" "add v2.2d, v2.2d, v9.2d\n\t" "eor x12, x4, x5\n\t" "ext v9.16b, v6.16b, v7.16b, #8\n\t" "and x17, x16, x17\n\t" "add v2.2d, v2.2d, v9.2d\n\t" "and x12, x12, x11\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x6, x6, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x5\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x6, x6, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x8\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v2.2d, v2.2d, v9.2d\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 6 */ "mov x13, v3.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 7 */ "mov x13, v3.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v3.16b, v4.16b, #8\n\t" "ror x12, x9, #14\n\t" "shl v8.2d, v2.2d, #45\n\t" "ror x14, x5, #28\n\t" "sri v8.2d, v2.2d, #19\n\t" "eor x12, x12, x9, ror 18\n\t" "shl v9.2d, v2.2d, #3\n\t" "eor x14, x14, x5, ror 34\n\t" "sri v9.2d, v2.2d, #61\n\t" "eor x12, x12, x9, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x5, ror 39\n\t" "ushr v8.2d, v2.2d, #6\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x5, x6\n\t" "add v3.2d, v3.2d, v9.2d\n\t" "eor x12, x10, x11\n\t" "ext v9.16b, v7.16b, v0.16b, #8\n\t" "and x17, x16, x17\n\t" "add v3.2d, v3.2d, v9.2d\n\t" "and x12, x12, x9\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x4, x4, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x11\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x4, x4, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x6\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v3.2d, v3.2d, v9.2d\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" /* Round 8 */ "mov x13, v4.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 9 */ "mov x13, v4.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v4.16b, v5.16b, #8\n\t" "ror x12, x7, #14\n\t" "shl v8.2d, v3.2d, #45\n\t" "ror x14, x11, #28\n\t" "sri v8.2d, v3.2d, #19\n\t" "eor x12, x12, x7, ror 18\n\t" "shl v9.2d, v3.2d, #3\n\t" "eor x14, x14, x11, ror 34\n\t" "sri v9.2d, v3.2d, #61\n\t" "eor x12, x12, x7, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x11, ror 39\n\t" "ushr v8.2d, v3.2d, #6\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x11, x4\n\t" "add v4.2d, v4.2d, v9.2d\n\t" "eor x12, x8, x9\n\t" "ext v9.16b, v0.16b, v1.16b, #8\n\t" "and x17, x16, x17\n\t" "add v4.2d, v4.2d, v9.2d\n\t" "and x12, x12, x7\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x10, x10, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x9\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x10, x10, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x4\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x10, x10, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v4.2d, v4.2d, v9.2d\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 10 */ "mov x13, v5.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 11 */ "mov x13, v5.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v5.16b, v6.16b, #8\n\t" "ror x12, x5, #14\n\t" "shl v8.2d, v4.2d, #45\n\t" "ror x14, x9, #28\n\t" "sri v8.2d, v4.2d, #19\n\t" "eor x12, x12, x5, ror 18\n\t" "shl v9.2d, v4.2d, #3\n\t" "eor x14, x14, x9, ror 34\n\t" "sri v9.2d, v4.2d, #61\n\t" "eor x12, x12, x5, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x9, ror 39\n\t" "ushr v8.2d, v4.2d, #6\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x9, x10\n\t" "add v5.2d, v5.2d, v9.2d\n\t" "eor x12, x6, x7\n\t" "ext v9.16b, v1.16b, v2.16b, #8\n\t" "and x17, x16, x17\n\t" "add v5.2d, v5.2d, v9.2d\n\t" "and x12, x12, x5\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x8, x8, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x7\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x8, x8, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x10\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x8, x8, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v5.2d, v5.2d, v9.2d\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 12 */ "mov x13, v6.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 13 */ "mov x13, v6.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v6.16b, v7.16b, #8\n\t" "ror x12, x11, #14\n\t" "shl v8.2d, v5.2d, #45\n\t" "ror x14, x7, #28\n\t" "sri v8.2d, v5.2d, #19\n\t" "eor x12, x12, x11, ror 18\n\t" "shl v9.2d, v5.2d, #3\n\t" "eor x14, x14, x7, ror 34\n\t" "sri v9.2d, v5.2d, #61\n\t" "eor x12, x12, x11, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x7, ror 39\n\t" "ushr v8.2d, v5.2d, #6\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x7, x8\n\t" "add v6.2d, v6.2d, v9.2d\n\t" "eor x12, x4, x5\n\t" "ext v9.16b, v2.16b, v3.16b, #8\n\t" "and x17, x16, x17\n\t" "add v6.2d, v6.2d, v9.2d\n\t" "and x12, x12, x11\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x6, x6, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x5\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x6, x6, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x8\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x6, x6, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v6.2d, v6.2d, v9.2d\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 14 */ "mov x13, v7.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 15 */ "mov x13, v7.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ext v10.16b, v7.16b, v0.16b, #8\n\t" "ror x12, x9, #14\n\t" "shl v8.2d, v6.2d, #45\n\t" "ror x14, x5, #28\n\t" "sri v8.2d, v6.2d, #19\n\t" "eor x12, x12, x9, ror 18\n\t" "shl v9.2d, v6.2d, #3\n\t" "eor x14, x14, x5, ror 34\n\t" "sri v9.2d, v6.2d, #61\n\t" "eor x12, x12, x9, ror 41\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x14, x14, x5, ror 39\n\t" "ushr v8.2d, v6.2d, #6\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x16, x5, x6\n\t" "add v7.2d, v7.2d, v9.2d\n\t" "eor x12, x10, x11\n\t" "ext v9.16b, v3.16b, v4.16b, #8\n\t" "and x17, x16, x17\n\t" "add v7.2d, v7.2d, v9.2d\n\t" "and x12, x12, x9\n\t" "shl v8.2d, v10.2d, #63\n\t" "add x4, x4, x13\n\t" "sri v8.2d, v10.2d, #1\n\t" "eor x12, x12, x11\n\t" "tbl v9.16b, {v10.16b}, v11.16b\n\t" "add x4, x4, x15\n\t" "eor v9.16b, v9.16b, v8.16b\n\t" "eor x17, x17, x6\n\t" "ushr v10.2d, v10.2d, #7\n\t" "add x4, x4, x12\n\t" "eor v9.16b, v9.16b, v10.16b\n\t" "add x14, x14, x17\n\t" "add v7.2d, v7.2d, v9.2d\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" "subs %[r8], %[r8], #1\n\t" "b.ne L_sha512_len_neon_start_%=\n\t" /* Round 0 */ "mov x13, v0.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 1 */ "mov x13, v0.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x7, #14\n\t" "ror x14, x11, #28\n\t" "eor x12, x12, x7, ror 18\n\t" "eor x14, x14, x11, ror 34\n\t" "eor x12, x12, x7, ror 41\n\t" "eor x14, x14, x11, ror 39\n\t" "add x10, x10, x12\n\t" "eor x16, x11, x4\n\t" "eor x12, x8, x9\n\t" "and x17, x16, x17\n\t" "and x12, x12, x7\n\t" "add x10, x10, x13\n\t" "eor x12, x12, x9\n\t" "add x10, x10, x15\n\t" "eor x17, x17, x4\n\t" "add x10, x10, x12\n\t" "add x14, x14, x17\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 2 */ "mov x13, v1.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 3 */ "mov x13, v1.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x5, #14\n\t" "ror x14, x9, #28\n\t" "eor x12, x12, x5, ror 18\n\t" "eor x14, x14, x9, ror 34\n\t" "eor x12, x12, x5, ror 41\n\t" "eor x14, x14, x9, ror 39\n\t" "add x8, x8, x12\n\t" "eor x16, x9, x10\n\t" "eor x12, x6, x7\n\t" "and x17, x16, x17\n\t" "and x12, x12, x5\n\t" "add x8, x8, x13\n\t" "eor x12, x12, x7\n\t" "add x8, x8, x15\n\t" "eor x17, x17, x10\n\t" "add x8, x8, x12\n\t" "add x14, x14, x17\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 4 */ "mov x13, v2.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 5 */ "mov x13, v2.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x11, #14\n\t" "ror x14, x7, #28\n\t" "eor x12, x12, x11, ror 18\n\t" "eor x14, x14, x7, ror 34\n\t" "eor x12, x12, x11, ror 41\n\t" "eor x14, x14, x7, ror 39\n\t" "add x6, x6, x12\n\t" "eor x16, x7, x8\n\t" "eor x12, x4, x5\n\t" "and x17, x16, x17\n\t" "and x12, x12, x11\n\t" "add x6, x6, x13\n\t" "eor x12, x12, x5\n\t" "add x6, x6, x15\n\t" "eor x17, x17, x8\n\t" "add x6, x6, x12\n\t" "add x14, x14, x17\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 6 */ "mov x13, v3.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 7 */ "mov x13, v3.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x9, #14\n\t" "ror x14, x5, #28\n\t" "eor x12, x12, x9, ror 18\n\t" "eor x14, x14, x5, ror 34\n\t" "eor x12, x12, x9, ror 41\n\t" "eor x14, x14, x5, ror 39\n\t" "add x4, x4, x12\n\t" "eor x16, x5, x6\n\t" "eor x12, x10, x11\n\t" "and x17, x16, x17\n\t" "and x12, x12, x9\n\t" "add x4, x4, x13\n\t" "eor x12, x12, x11\n\t" "add x4, x4, x15\n\t" "eor x17, x17, x6\n\t" "add x4, x4, x12\n\t" "add x14, x14, x17\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" /* Round 8 */ "mov x13, v4.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x8, #14\n\t" "ror x14, x4, #28\n\t" "eor x12, x12, x8, ror 18\n\t" "eor x14, x14, x4, ror 34\n\t" "eor x12, x12, x8, ror 41\n\t" "eor x14, x14, x4, ror 39\n\t" "add x11, x11, x12\n\t" "eor x17, x4, x5\n\t" "eor x12, x9, x10\n\t" "and x16, x17, x16\n\t" "and x12, x12, x8\n\t" "add x11, x11, x13\n\t" "eor x12, x12, x10\n\t" "add x11, x11, x15\n\t" "eor x16, x16, x5\n\t" "add x11, x11, x12\n\t" "add x14, x14, x16\n\t" "add x7, x7, x11\n\t" "add x11, x11, x14\n\t" /* Round 9 */ "mov x13, v4.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x7, #14\n\t" "ror x14, x11, #28\n\t" "eor x12, x12, x7, ror 18\n\t" "eor x14, x14, x11, ror 34\n\t" "eor x12, x12, x7, ror 41\n\t" "eor x14, x14, x11, ror 39\n\t" "add x10, x10, x12\n\t" "eor x16, x11, x4\n\t" "eor x12, x8, x9\n\t" "and x17, x16, x17\n\t" "and x12, x12, x7\n\t" "add x10, x10, x13\n\t" "eor x12, x12, x9\n\t" "add x10, x10, x15\n\t" "eor x17, x17, x4\n\t" "add x10, x10, x12\n\t" "add x14, x14, x17\n\t" "add x6, x6, x10\n\t" "add x10, x10, x14\n\t" /* Round 10 */ "mov x13, v5.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x6, #14\n\t" "ror x14, x10, #28\n\t" "eor x12, x12, x6, ror 18\n\t" "eor x14, x14, x10, ror 34\n\t" "eor x12, x12, x6, ror 41\n\t" "eor x14, x14, x10, ror 39\n\t" "add x9, x9, x12\n\t" "eor x17, x10, x11\n\t" "eor x12, x7, x8\n\t" "and x16, x17, x16\n\t" "and x12, x12, x6\n\t" "add x9, x9, x13\n\t" "eor x12, x12, x8\n\t" "add x9, x9, x15\n\t" "eor x16, x16, x11\n\t" "add x9, x9, x12\n\t" "add x14, x14, x16\n\t" "add x5, x5, x9\n\t" "add x9, x9, x14\n\t" /* Round 11 */ "mov x13, v5.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x5, #14\n\t" "ror x14, x9, #28\n\t" "eor x12, x12, x5, ror 18\n\t" "eor x14, x14, x9, ror 34\n\t" "eor x12, x12, x5, ror 41\n\t" "eor x14, x14, x9, ror 39\n\t" "add x8, x8, x12\n\t" "eor x16, x9, x10\n\t" "eor x12, x6, x7\n\t" "and x17, x16, x17\n\t" "and x12, x12, x5\n\t" "add x8, x8, x13\n\t" "eor x12, x12, x7\n\t" "add x8, x8, x15\n\t" "eor x17, x17, x10\n\t" "add x8, x8, x12\n\t" "add x14, x14, x17\n\t" "add x4, x4, x8\n\t" "add x8, x8, x14\n\t" /* Round 12 */ "mov x13, v6.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x4, #14\n\t" "ror x14, x8, #28\n\t" "eor x12, x12, x4, ror 18\n\t" "eor x14, x14, x8, ror 34\n\t" "eor x12, x12, x4, ror 41\n\t" "eor x14, x14, x8, ror 39\n\t" "add x7, x7, x12\n\t" "eor x17, x8, x9\n\t" "eor x12, x5, x6\n\t" "and x16, x17, x16\n\t" "and x12, x12, x4\n\t" "add x7, x7, x13\n\t" "eor x12, x12, x6\n\t" "add x7, x7, x15\n\t" "eor x16, x16, x9\n\t" "add x7, x7, x12\n\t" "add x14, x14, x16\n\t" "add x11, x11, x7\n\t" "add x7, x7, x14\n\t" /* Round 13 */ "mov x13, v6.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x11, #14\n\t" "ror x14, x7, #28\n\t" "eor x12, x12, x11, ror 18\n\t" "eor x14, x14, x7, ror 34\n\t" "eor x12, x12, x11, ror 41\n\t" "eor x14, x14, x7, ror 39\n\t" "add x6, x6, x12\n\t" "eor x16, x7, x8\n\t" "eor x12, x4, x5\n\t" "and x17, x16, x17\n\t" "and x12, x12, x11\n\t" "add x6, x6, x13\n\t" "eor x12, x12, x5\n\t" "add x6, x6, x15\n\t" "eor x17, x17, x8\n\t" "add x6, x6, x12\n\t" "add x14, x14, x17\n\t" "add x10, x10, x6\n\t" "add x6, x6, x14\n\t" /* Round 14 */ "mov x13, v7.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x10, #14\n\t" "ror x14, x6, #28\n\t" "eor x12, x12, x10, ror 18\n\t" "eor x14, x14, x6, ror 34\n\t" "eor x12, x12, x10, ror 41\n\t" "eor x14, x14, x6, ror 39\n\t" "add x5, x5, x12\n\t" "eor x17, x6, x7\n\t" "eor x12, x11, x4\n\t" "and x16, x17, x16\n\t" "and x12, x12, x10\n\t" "add x5, x5, x13\n\t" "eor x12, x12, x4\n\t" "add x5, x5, x15\n\t" "eor x16, x16, x7\n\t" "add x5, x5, x12\n\t" "add x14, x14, x16\n\t" "add x9, x9, x5\n\t" "add x5, x5, x14\n\t" /* Round 15 */ "mov x13, v7.d[1]\n\t" "ldr x15, [%[k]], #8\n\t" "ror x12, x9, #14\n\t" "ror x14, x5, #28\n\t" "eor x12, x12, x9, ror 18\n\t" "eor x14, x14, x5, ror 34\n\t" "eor x12, x12, x9, ror 41\n\t" "eor x14, x14, x5, ror 39\n\t" "add x4, x4, x12\n\t" "eor x16, x5, x6\n\t" "eor x12, x10, x11\n\t" "and x17, x16, x17\n\t" "and x12, x12, x9\n\t" "add x4, x4, x13\n\t" "eor x12, x12, x11\n\t" "add x4, x4, x15\n\t" "eor x17, x17, x6\n\t" "add x4, x4, x12\n\t" "add x14, x14, x17\n\t" "add x8, x8, x4\n\t" "add x4, x4, x14\n\t" "add x11, x11, x26\n\t" "add x10, x10, x25\n\t" "add x9, x9, x24\n\t" "add x8, x8, x23\n\t" "add x7, x7, x22\n\t" "add x6, x6, x21\n\t" "add x5, x5, x20\n\t" "add x4, x4, x19\n\t" "subs %w[len], %w[len], #0x80\n\t" "sub %[k], %[k], #0x280\n\t" "b.ne L_sha512_len_neon_begin_%=\n\t" "stp x4, x5, [%x[sha512]]\n\t" "stp x6, x7, [%x[sha512], #16]\n\t" "stp x8, x9, [%x[sha512], #32]\n\t" "stp x10, x11, [%x[sha512], #48]\n\t" : [sha512] "+r" (sha512), [len] "+r" (len) : [data] "r" (data), [k] "r" (k), [r8] "r" (r8) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); } #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 XALIGNED(16) static const word64 L_SHA512_trans_crypto_len_k[] = { 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, 0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, 0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len); void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, word32 len) { const word64* k = L_SHA512_trans_crypto_len_k; __asm__ __volatile__ ( #ifdef __APPLE__ ".arch_extension sha3\n\t" #endif /* __APPLE__ */ /* Load K into vector registers */ "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%[k]], #0x40\n\t" "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [%[k]], #0x40\n\t" /* Load digest into working vars */ "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t" /* Start of loop processing a block */ "\n" "L_sha512_len_crypto_begin_%=:\n\t" "mov x3, %[k]\n\t" /* Load W */ "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[data]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[data]], #0x40\n\t" "rev64 v0.16b, v0.16b\n\t" "rev64 v1.16b, v1.16b\n\t" "rev64 v2.16b, v2.16b\n\t" "rev64 v3.16b, v3.16b\n\t" "rev64 v4.16b, v4.16b\n\t" "rev64 v5.16b, v5.16b\n\t" "rev64 v6.16b, v6.16b\n\t" "rev64 v7.16b, v7.16b\n\t" /* Copy digest to add in at end */ "mov v28.16b, v24.16b\n\t" "mov v29.16b, v25.16b\n\t" "mov v30.16b, v26.16b\n\t" "mov v31.16b, v27.16b\n\t" /* Start of 16 rounds */ /* Round 0 */ "add v20.2d, v0.2d, v8.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Round 1 */ "add v20.2d, v1.2d, v9.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Round 2 */ "add v20.2d, v2.2d, v10.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Round 3 */ "add v20.2d, v3.2d, v11.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Round 4 */ "add v20.2d, v4.2d, v12.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" /* Round 5 */ "add v20.2d, v5.2d, v13.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Round 6 */ "add v20.2d, v6.2d, v14.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Round 7 */ "add v20.2d, v7.2d, v15.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 8 */ "sha512su0 v0.2d, v1.2d\n\t" "ext v21.16b, v4.16b, v5.16b, #8\n\t" "sha512su1 v0.2d, v7.2d, v21.2d\n\t" "add v20.2d, v0.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Round 9 */ "sha512su0 v1.2d, v2.2d\n\t" "ext v21.16b, v5.16b, v6.16b, #8\n\t" "sha512su1 v1.2d, v0.2d, v21.2d\n\t" "add v20.2d, v1.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" /* Round 10 */ "sha512su0 v2.2d, v3.2d\n\t" "ext v21.16b, v6.16b, v7.16b, #8\n\t" "sha512su1 v2.2d, v1.2d, v21.2d\n\t" "add v20.2d, v2.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Round 11 */ "sha512su0 v3.2d, v4.2d\n\t" "ext v21.16b, v7.16b, v0.16b, #8\n\t" "sha512su1 v3.2d, v2.2d, v21.2d\n\t" "add v20.2d, v3.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 12 */ "sha512su0 v4.2d, v5.2d\n\t" "ext v21.16b, v0.16b, v1.16b, #8\n\t" "sha512su1 v4.2d, v3.2d, v21.2d\n\t" "add v20.2d, v4.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Round 13 */ "sha512su0 v5.2d, v6.2d\n\t" "ext v21.16b, v1.16b, v2.16b, #8\n\t" "sha512su1 v5.2d, v4.2d, v21.2d\n\t" "add v20.2d, v5.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Round 14 */ "sha512su0 v6.2d, v7.2d\n\t" "ext v21.16b, v2.16b, v3.16b, #8\n\t" "sha512su1 v6.2d, v5.2d, v21.2d\n\t" "add v20.2d, v6.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" /* Round 15 */ "sha512su0 v7.2d, v0.2d\n\t" "ext v21.16b, v3.16b, v4.16b, #8\n\t" "sha512su1 v7.2d, v6.2d, v21.2d\n\t" "add v20.2d, v7.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 16 */ "sha512su0 v0.2d, v1.2d\n\t" "ext v21.16b, v4.16b, v5.16b, #8\n\t" "sha512su1 v0.2d, v7.2d, v21.2d\n\t" "add v20.2d, v0.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Round 17 */ "sha512su0 v1.2d, v2.2d\n\t" "ext v21.16b, v5.16b, v6.16b, #8\n\t" "sha512su1 v1.2d, v0.2d, v21.2d\n\t" "add v20.2d, v1.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Round 18 */ "sha512su0 v2.2d, v3.2d\n\t" "ext v21.16b, v6.16b, v7.16b, #8\n\t" "sha512su1 v2.2d, v1.2d, v21.2d\n\t" "add v20.2d, v2.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Round 19 */ "sha512su0 v3.2d, v4.2d\n\t" "ext v21.16b, v7.16b, v0.16b, #8\n\t" "sha512su1 v3.2d, v2.2d, v21.2d\n\t" "add v20.2d, v3.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 20 */ "sha512su0 v4.2d, v5.2d\n\t" "ext v21.16b, v0.16b, v1.16b, #8\n\t" "sha512su1 v4.2d, v3.2d, v21.2d\n\t" "add v20.2d, v4.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Round 21 */ "sha512su0 v5.2d, v6.2d\n\t" "ext v21.16b, v1.16b, v2.16b, #8\n\t" "sha512su1 v5.2d, v4.2d, v21.2d\n\t" "add v20.2d, v5.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Round 22 */ "sha512su0 v6.2d, v7.2d\n\t" "ext v21.16b, v2.16b, v3.16b, #8\n\t" "sha512su1 v6.2d, v5.2d, v21.2d\n\t" "add v20.2d, v6.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Round 23 */ "sha512su0 v7.2d, v0.2d\n\t" "ext v21.16b, v3.16b, v4.16b, #8\n\t" "sha512su1 v7.2d, v6.2d, v21.2d\n\t" "add v20.2d, v7.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 24 */ "sha512su0 v0.2d, v1.2d\n\t" "ext v21.16b, v4.16b, v5.16b, #8\n\t" "sha512su1 v0.2d, v7.2d, v21.2d\n\t" "add v20.2d, v0.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" /* Round 25 */ "sha512su0 v1.2d, v2.2d\n\t" "ext v21.16b, v5.16b, v6.16b, #8\n\t" "sha512su1 v1.2d, v0.2d, v21.2d\n\t" "add v20.2d, v1.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Round 26 */ "sha512su0 v2.2d, v3.2d\n\t" "ext v21.16b, v6.16b, v7.16b, #8\n\t" "sha512su1 v2.2d, v1.2d, v21.2d\n\t" "add v20.2d, v2.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Round 27 */ "sha512su0 v3.2d, v4.2d\n\t" "ext v21.16b, v7.16b, v0.16b, #8\n\t" "sha512su1 v3.2d, v2.2d, v21.2d\n\t" "add v20.2d, v3.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 28 */ "sha512su0 v4.2d, v5.2d\n\t" "ext v21.16b, v0.16b, v1.16b, #8\n\t" "sha512su1 v4.2d, v3.2d, v21.2d\n\t" "add v20.2d, v4.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Round 29 */ "sha512su0 v5.2d, v6.2d\n\t" "ext v21.16b, v1.16b, v2.16b, #8\n\t" "sha512su1 v5.2d, v4.2d, v21.2d\n\t" "add v20.2d, v5.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" /* Round 30 */ "sha512su0 v6.2d, v7.2d\n\t" "ext v21.16b, v2.16b, v3.16b, #8\n\t" "sha512su1 v6.2d, v5.2d, v21.2d\n\t" "add v20.2d, v6.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Round 31 */ "sha512su0 v7.2d, v0.2d\n\t" "ext v21.16b, v3.16b, v4.16b, #8\n\t" "sha512su1 v7.2d, v6.2d, v21.2d\n\t" "add v20.2d, v7.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 32 */ "sha512su0 v0.2d, v1.2d\n\t" "ext v21.16b, v4.16b, v5.16b, #8\n\t" "sha512su1 v0.2d, v7.2d, v21.2d\n\t" "add v20.2d, v0.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Round 33 */ "sha512su0 v1.2d, v2.2d\n\t" "ext v21.16b, v5.16b, v6.16b, #8\n\t" "sha512su1 v1.2d, v0.2d, v21.2d\n\t" "add v20.2d, v1.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Round 34 */ "sha512su0 v2.2d, v3.2d\n\t" "ext v21.16b, v6.16b, v7.16b, #8\n\t" "sha512su1 v2.2d, v1.2d, v21.2d\n\t" "add v20.2d, v2.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" /* Round 35 */ "sha512su0 v3.2d, v4.2d\n\t" "ext v21.16b, v7.16b, v0.16b, #8\n\t" "sha512su1 v3.2d, v2.2d, v21.2d\n\t" "add v20.2d, v3.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v26.16b, v27.16b, #8\n\t" "ext v22.16b, v25.16b, v26.16b, #8\n\t" "add v27.2d, v27.2d, v20.2d\n\t" "sha512h q27, q21, v22.2d\n\t" "add v23.2d, v25.2d, v27.2d\n\t" "sha512h2 q27, q25, v24.2d\n\t" /* Load next 8 64-bit words of K */ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #0x40\n\t" /* Round 36 */ "sha512su0 v4.2d, v5.2d\n\t" "ext v21.16b, v0.16b, v1.16b, #8\n\t" "sha512su1 v4.2d, v3.2d, v21.2d\n\t" "add v20.2d, v4.2d, v16.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v23.16b, v26.16b, #8\n\t" "ext v22.16b, v24.16b, v23.16b, #8\n\t" "add v26.2d, v26.2d, v20.2d\n\t" "sha512h q26, q21, v22.2d\n\t" "add v25.2d, v24.2d, v26.2d\n\t" "sha512h2 q26, q24, v27.2d\n\t" /* Round 37 */ "sha512su0 v5.2d, v6.2d\n\t" "ext v21.16b, v1.16b, v2.16b, #8\n\t" "sha512su1 v5.2d, v4.2d, v21.2d\n\t" "add v20.2d, v5.2d, v17.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v25.16b, v23.16b, #8\n\t" "ext v22.16b, v27.16b, v25.16b, #8\n\t" "add v23.2d, v23.2d, v20.2d\n\t" "sha512h q23, q21, v22.2d\n\t" "add v24.2d, v27.2d, v23.2d\n\t" "sha512h2 q23, q27, v26.2d\n\t" /* Round 38 */ "sha512su0 v6.2d, v7.2d\n\t" "ext v21.16b, v2.16b, v3.16b, #8\n\t" "sha512su1 v6.2d, v5.2d, v21.2d\n\t" "add v20.2d, v6.2d, v18.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v24.16b, v25.16b, #8\n\t" "ext v22.16b, v26.16b, v24.16b, #8\n\t" "add v25.2d, v25.2d, v20.2d\n\t" "sha512h q25, q21, v22.2d\n\t" "add v27.2d, v26.2d, v25.2d\n\t" "sha512h2 q25, q26, v23.2d\n\t" /* Round 39 */ "sha512su0 v7.2d, v0.2d\n\t" "ext v21.16b, v3.16b, v4.16b, #8\n\t" "sha512su1 v7.2d, v6.2d, v21.2d\n\t" "add v20.2d, v7.2d, v19.2d\n\t" "ext v20.16b, v20.16b, v20.16b, #8\n\t" "ext v21.16b, v27.16b, v24.16b, #8\n\t" "ext v22.16b, v23.16b, v27.16b, #8\n\t" "add v24.2d, v24.2d, v20.2d\n\t" "sha512h q24, q21, v22.2d\n\t" "add v26.2d, v23.2d, v24.2d\n\t" "sha512h2 q24, q23, v25.2d\n\t" "add v27.2d, v27.2d, v31.2d\n\t" "add v26.2d, v26.2d, v30.2d\n\t" "add v25.2d, v25.2d, v29.2d\n\t" "add v24.2d, v24.2d, v28.2d\n\t" "subs %w[len], %w[len], #0x80\n\t" "b.ne L_sha512_len_crypto_begin_%=\n\t" /* Store digest back */ "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t" : [sha512] "+r" (sha512), [len] "+r" (len) : [data] "r" (data), [k] "r" (k) : "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ); } #endif /* WOLFSSL_ARMASM_CRYPTO_SHA512 */ #endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */ #endif /* __aarch64__ */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */