/* armv8-sha256-asm * * Copyright (C) 2006-2026 wolfSSL Inc. * * This file is part of wolfSSL. * * wolfSSL is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * wolfSSL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ #include #include /* Generated using (from wolfssl): * cd ../scripts * ruby ./sha2/sha256.rb arm64 \ * ../wolfssl/wolfcrypt/src/port/arm/armv8-sha256-asm.c */ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifdef WOLFSSL_ARMASM_INLINE #include #if !defined(NO_SHA256) || defined(WOLFSSL_SHA224) XALIGNED(8) static const word32 L_SHA256_transform_neon_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, }; void Transform_Sha256_Len_neon(wc_Sha256* sha256, const byte* data, word32 len) { const word32* k = L_SHA256_transform_neon_len_k; __asm__ __volatile__ ( /* Load digest into working vars */ "ldr w4, [%x[sha256]]\n\t" "ldr w5, [%x[sha256], #4]\n\t" "ldr w6, [%x[sha256], #8]\n\t" "ldr w7, [%x[sha256], #12]\n\t" "ldr w8, [%x[sha256], #16]\n\t" "ldr w9, [%x[sha256], #20]\n\t" "ldr w10, [%x[sha256], #24]\n\t" "ldr w11, [%x[sha256], #28]\n\t" /* Start of loop processing a block */ "\n" "L_sha256_len_neon_begin_%=:\n\t" /* Load W */ /* Copy digest to add in at end */ "ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [%x[data]], #32\n\t" "mov w15, w4\n\t" "ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [%x[data]], #32\n\t" "mov w16, w5\n\t" "rev32 v0.8b, v0.8b\n\t" "mov w17, w6\n\t" "rev32 v1.8b, v1.8b\n\t" "mov w19, w7\n\t" "rev32 v2.8b, v2.8b\n\t" "mov w20, w8\n\t" "rev32 v3.8b, v3.8b\n\t" "mov w21, w9\n\t" "rev32 v4.8b, v4.8b\n\t" "mov w22, w10\n\t" "rev32 v5.8b, v5.8b\n\t" "mov w23, w11\n\t" "rev32 v6.8b, v6.8b\n\t" "rev32 v7.8b, v7.8b\n\t" "mov x24, #3\n\t" /* Start of 16 rounds */ "\n" "L_sha256_len_neon_start_%=:\n\t" /* Round 0 */ "mov w14, v0.s[0]\n\t" "ror w12, w8, #6\n\t" "eor w13, w9, w10\n\t" "eor w12, w12, w8, ror 11\n\t" "and w13, w13, w8\n\t" "eor w12, w12, w8, ror 25\n\t" "eor w13, w13, w10\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" "ldr w12, [%[k]]\n\t" "add w11, w11, w14\n\t" "add w11, w11, w12\n\t" "add w7, w7, w11\n\t" "ror w12, w4, #2\n\t" "eor w13, w4, w5\n\t" "eor w12, w12, w4, ror 13\n\t" "eor w14, w5, w6\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w4, ror 22\n\t" "eor w13, w13, w5\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" /* Round 1 */ "mov w14, v0.s[1]\n\t" /* Calc new W[0]-W[1] */ "ext v10.8b, v0.8b, v1.8b, #4\n\t" "ror w12, w7, #6\n\t" "shl v8.2s, v7.2s, #15\n\t" "eor w13, w8, w9\n\t" "sri v8.2s, v7.2s, #17\n\t" "eor w12, w12, w7, ror 11\n\t" "shl v9.2s, v7.2s, #13\n\t" "and w13, w13, w7\n\t" "sri v9.2s, v7.2s, #19\n\t" "eor w12, w12, w7, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w9\n\t" "ushr v8.2s, v7.2s, #10\n\t" "add w10, w10, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w10, w10, w13\n\t" "add v0.2s, v0.2s, v9.2s\n\t" "ldr w12, [%[k], #4]\n\t" "ext v11.8b, v4.8b, v5.8b, #4\n\t" "add w10, w10, w14\n\t" "add v0.2s, v0.2s, v11.2s\n\t" "add w10, w10, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w6, w6, w10\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w11, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w11, w4\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w11, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w4, w5\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w11, ror 22\n\t" "add v0.2s, v0.2s, v9.2s\n\t" "eor w13, w13, w4\n\t" "add w10, w10, w12\n\t" "add w10, w10, w13\n\t" /* Round 2 */ "mov w14, v1.s[0]\n\t" "ror w12, w6, #6\n\t" "eor w13, w7, w8\n\t" "eor w12, w12, w6, ror 11\n\t" "and w13, w13, w6\n\t" "eor w12, w12, w6, ror 25\n\t" "eor w13, w13, w8\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" "ldr w12, [%[k], #8]\n\t" "add w9, w9, w14\n\t" "add w9, w9, w12\n\t" "add w5, w5, w9\n\t" "ror w12, w10, #2\n\t" "eor w13, w10, w11\n\t" "eor w12, w12, w10, ror 13\n\t" "eor w14, w11, w4\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w10, ror 22\n\t" "eor w13, w13, w11\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" /* Round 3 */ "mov w14, v1.s[1]\n\t" /* Calc new W[2]-W[3] */ "ext v10.8b, v1.8b, v2.8b, #4\n\t" "ror w12, w5, #6\n\t" "shl v8.2s, v0.2s, #15\n\t" "eor w13, w6, w7\n\t" "sri v8.2s, v0.2s, #17\n\t" "eor w12, w12, w5, ror 11\n\t" "shl v9.2s, v0.2s, #13\n\t" "and w13, w13, w5\n\t" "sri v9.2s, v0.2s, #19\n\t" "eor w12, w12, w5, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w7\n\t" "ushr v8.2s, v0.2s, #10\n\t" "add w8, w8, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w8, w8, w13\n\t" "add v1.2s, v1.2s, v9.2s\n\t" "ldr w12, [%[k], #12]\n\t" "ext v11.8b, v5.8b, v6.8b, #4\n\t" "add w8, w8, w14\n\t" "add v1.2s, v1.2s, v11.2s\n\t" "add w8, w8, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w4, w4, w8\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w9, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w9, w10\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w9, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w10, w11\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w9, ror 22\n\t" "add v1.2s, v1.2s, v9.2s\n\t" "eor w13, w13, w10\n\t" "add w8, w8, w12\n\t" "add w8, w8, w13\n\t" /* Round 4 */ "mov w14, v2.s[0]\n\t" "ror w12, w4, #6\n\t" "eor w13, w5, w6\n\t" "eor w12, w12, w4, ror 11\n\t" "and w13, w13, w4\n\t" "eor w12, w12, w4, ror 25\n\t" "eor w13, w13, w6\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" "ldr w12, [%[k], #16]\n\t" "add w7, w7, w14\n\t" "add w7, w7, w12\n\t" "add w11, w11, w7\n\t" "ror w12, w8, #2\n\t" "eor w13, w8, w9\n\t" "eor w12, w12, w8, ror 13\n\t" "eor w14, w9, w10\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w8, ror 22\n\t" "eor w13, w13, w9\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" /* Round 5 */ "mov w14, v2.s[1]\n\t" /* Calc new W[4]-W[5] */ "ext v10.8b, v2.8b, v3.8b, #4\n\t" "ror w12, w11, #6\n\t" "shl v8.2s, v1.2s, #15\n\t" "eor w13, w4, w5\n\t" "sri v8.2s, v1.2s, #17\n\t" "eor w12, w12, w11, ror 11\n\t" "shl v9.2s, v1.2s, #13\n\t" "and w13, w13, w11\n\t" "sri v9.2s, v1.2s, #19\n\t" "eor w12, w12, w11, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w5\n\t" "ushr v8.2s, v1.2s, #10\n\t" "add w6, w6, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w6, w6, w13\n\t" "add v2.2s, v2.2s, v9.2s\n\t" "ldr w12, [%[k], #20]\n\t" "ext v11.8b, v6.8b, v7.8b, #4\n\t" "add w6, w6, w14\n\t" "add v2.2s, v2.2s, v11.2s\n\t" "add w6, w6, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w10, w10, w6\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w7, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w7, w8\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w7, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w8, w9\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w7, ror 22\n\t" "add v2.2s, v2.2s, v9.2s\n\t" "eor w13, w13, w8\n\t" "add w6, w6, w12\n\t" "add w6, w6, w13\n\t" /* Round 6 */ "mov w14, v3.s[0]\n\t" "ror w12, w10, #6\n\t" "eor w13, w11, w4\n\t" "eor w12, w12, w10, ror 11\n\t" "and w13, w13, w10\n\t" "eor w12, w12, w10, ror 25\n\t" "eor w13, w13, w4\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" "ldr w12, [%[k], #24]\n\t" "add w5, w5, w14\n\t" "add w5, w5, w12\n\t" "add w9, w9, w5\n\t" "ror w12, w6, #2\n\t" "eor w13, w6, w7\n\t" "eor w12, w12, w6, ror 13\n\t" "eor w14, w7, w8\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w6, ror 22\n\t" "eor w13, w13, w7\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" /* Round 7 */ "mov w14, v3.s[1]\n\t" /* Calc new W[6]-W[7] */ "ext v10.8b, v3.8b, v4.8b, #4\n\t" "ror w12, w9, #6\n\t" "shl v8.2s, v2.2s, #15\n\t" "eor w13, w10, w11\n\t" "sri v8.2s, v2.2s, #17\n\t" "eor w12, w12, w9, ror 11\n\t" "shl v9.2s, v2.2s, #13\n\t" "and w13, w13, w9\n\t" "sri v9.2s, v2.2s, #19\n\t" "eor w12, w12, w9, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w11\n\t" "ushr v8.2s, v2.2s, #10\n\t" "add w4, w4, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w4, w4, w13\n\t" "add v3.2s, v3.2s, v9.2s\n\t" "ldr w12, [%[k], #28]\n\t" "ext v11.8b, v7.8b, v0.8b, #4\n\t" "add w4, w4, w14\n\t" "add v3.2s, v3.2s, v11.2s\n\t" "add w4, w4, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w8, w8, w4\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w5, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w5, w6\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w5, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w6, w7\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w5, ror 22\n\t" "add v3.2s, v3.2s, v9.2s\n\t" "eor w13, w13, w6\n\t" "add w4, w4, w12\n\t" "add w4, w4, w13\n\t" /* Round 8 */ "mov w14, v4.s[0]\n\t" "ror w12, w8, #6\n\t" "eor w13, w9, w10\n\t" "eor w12, w12, w8, ror 11\n\t" "and w13, w13, w8\n\t" "eor w12, w12, w8, ror 25\n\t" "eor w13, w13, w10\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" "ldr w12, [%[k], #32]\n\t" "add w11, w11, w14\n\t" "add w11, w11, w12\n\t" "add w7, w7, w11\n\t" "ror w12, w4, #2\n\t" "eor w13, w4, w5\n\t" "eor w12, w12, w4, ror 13\n\t" "eor w14, w5, w6\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w4, ror 22\n\t" "eor w13, w13, w5\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" /* Round 9 */ "mov w14, v4.s[1]\n\t" /* Calc new W[8]-W[9] */ "ext v10.8b, v4.8b, v5.8b, #4\n\t" "ror w12, w7, #6\n\t" "shl v8.2s, v3.2s, #15\n\t" "eor w13, w8, w9\n\t" "sri v8.2s, v3.2s, #17\n\t" "eor w12, w12, w7, ror 11\n\t" "shl v9.2s, v3.2s, #13\n\t" "and w13, w13, w7\n\t" "sri v9.2s, v3.2s, #19\n\t" "eor w12, w12, w7, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w9\n\t" "ushr v8.2s, v3.2s, #10\n\t" "add w10, w10, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w10, w10, w13\n\t" "add v4.2s, v4.2s, v9.2s\n\t" "ldr w12, [%[k], #36]\n\t" "ext v11.8b, v0.8b, v1.8b, #4\n\t" "add w10, w10, w14\n\t" "add v4.2s, v4.2s, v11.2s\n\t" "add w10, w10, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w6, w6, w10\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w11, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w11, w4\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w11, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w4, w5\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w11, ror 22\n\t" "add v4.2s, v4.2s, v9.2s\n\t" "eor w13, w13, w4\n\t" "add w10, w10, w12\n\t" "add w10, w10, w13\n\t" /* Round 10 */ "mov w14, v5.s[0]\n\t" "ror w12, w6, #6\n\t" "eor w13, w7, w8\n\t" "eor w12, w12, w6, ror 11\n\t" "and w13, w13, w6\n\t" "eor w12, w12, w6, ror 25\n\t" "eor w13, w13, w8\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" "ldr w12, [%[k], #40]\n\t" "add w9, w9, w14\n\t" "add w9, w9, w12\n\t" "add w5, w5, w9\n\t" "ror w12, w10, #2\n\t" "eor w13, w10, w11\n\t" "eor w12, w12, w10, ror 13\n\t" "eor w14, w11, w4\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w10, ror 22\n\t" "eor w13, w13, w11\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" /* Round 11 */ "mov w14, v5.s[1]\n\t" /* Calc new W[10]-W[11] */ "ext v10.8b, v5.8b, v6.8b, #4\n\t" "ror w12, w5, #6\n\t" "shl v8.2s, v4.2s, #15\n\t" "eor w13, w6, w7\n\t" "sri v8.2s, v4.2s, #17\n\t" "eor w12, w12, w5, ror 11\n\t" "shl v9.2s, v4.2s, #13\n\t" "and w13, w13, w5\n\t" "sri v9.2s, v4.2s, #19\n\t" "eor w12, w12, w5, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w7\n\t" "ushr v8.2s, v4.2s, #10\n\t" "add w8, w8, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w8, w8, w13\n\t" "add v5.2s, v5.2s, v9.2s\n\t" "ldr w12, [%[k], #44]\n\t" "ext v11.8b, v1.8b, v2.8b, #4\n\t" "add w8, w8, w14\n\t" "add v5.2s, v5.2s, v11.2s\n\t" "add w8, w8, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w4, w4, w8\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w9, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w9, w10\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w9, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w10, w11\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w9, ror 22\n\t" "add v5.2s, v5.2s, v9.2s\n\t" "eor w13, w13, w10\n\t" "add w8, w8, w12\n\t" "add w8, w8, w13\n\t" /* Round 12 */ "mov w14, v6.s[0]\n\t" "ror w12, w4, #6\n\t" "eor w13, w5, w6\n\t" "eor w12, w12, w4, ror 11\n\t" "and w13, w13, w4\n\t" "eor w12, w12, w4, ror 25\n\t" "eor w13, w13, w6\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" "ldr w12, [%[k], #48]\n\t" "add w7, w7, w14\n\t" "add w7, w7, w12\n\t" "add w11, w11, w7\n\t" "ror w12, w8, #2\n\t" "eor w13, w8, w9\n\t" "eor w12, w12, w8, ror 13\n\t" "eor w14, w9, w10\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w8, ror 22\n\t" "eor w13, w13, w9\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" /* Round 13 */ "mov w14, v6.s[1]\n\t" /* Calc new W[12]-W[13] */ "ext v10.8b, v6.8b, v7.8b, #4\n\t" "ror w12, w11, #6\n\t" "shl v8.2s, v5.2s, #15\n\t" "eor w13, w4, w5\n\t" "sri v8.2s, v5.2s, #17\n\t" "eor w12, w12, w11, ror 11\n\t" "shl v9.2s, v5.2s, #13\n\t" "and w13, w13, w11\n\t" "sri v9.2s, v5.2s, #19\n\t" "eor w12, w12, w11, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w5\n\t" "ushr v8.2s, v5.2s, #10\n\t" "add w6, w6, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w6, w6, w13\n\t" "add v6.2s, v6.2s, v9.2s\n\t" "ldr w12, [%[k], #52]\n\t" "ext v11.8b, v2.8b, v3.8b, #4\n\t" "add w6, w6, w14\n\t" "add v6.2s, v6.2s, v11.2s\n\t" "add w6, w6, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w10, w10, w6\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w7, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w7, w8\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w7, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w8, w9\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w7, ror 22\n\t" "add v6.2s, v6.2s, v9.2s\n\t" "eor w13, w13, w8\n\t" "add w6, w6, w12\n\t" "add w6, w6, w13\n\t" /* Round 14 */ "mov w14, v7.s[0]\n\t" "ror w12, w10, #6\n\t" "eor w13, w11, w4\n\t" "eor w12, w12, w10, ror 11\n\t" "and w13, w13, w10\n\t" "eor w12, w12, w10, ror 25\n\t" "eor w13, w13, w4\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" "ldr w12, [%[k], #56]\n\t" "add w5, w5, w14\n\t" "add w5, w5, w12\n\t" "add w9, w9, w5\n\t" "ror w12, w6, #2\n\t" "eor w13, w6, w7\n\t" "eor w12, w12, w6, ror 13\n\t" "eor w14, w7, w8\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w6, ror 22\n\t" "eor w13, w13, w7\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" /* Round 15 */ "mov w14, v7.s[1]\n\t" /* Calc new W[14]-W[15] */ "ext v10.8b, v7.8b, v0.8b, #4\n\t" "ror w12, w9, #6\n\t" "shl v8.2s, v6.2s, #15\n\t" "eor w13, w10, w11\n\t" "sri v8.2s, v6.2s, #17\n\t" "eor w12, w12, w9, ror 11\n\t" "shl v9.2s, v6.2s, #13\n\t" "and w13, w13, w9\n\t" "sri v9.2s, v6.2s, #19\n\t" "eor w12, w12, w9, ror 25\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w13, w13, w11\n\t" "ushr v8.2s, v6.2s, #10\n\t" "add w4, w4, w12\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "add w4, w4, w13\n\t" "add v7.2s, v7.2s, v9.2s\n\t" "ldr w12, [%[k], #60]\n\t" "ext v11.8b, v3.8b, v4.8b, #4\n\t" "add w4, w4, w14\n\t" "add v7.2s, v7.2s, v11.2s\n\t" "add w4, w4, w12\n\t" "shl v8.2s, v10.2s, #25\n\t" "add w8, w8, w4\n\t" "sri v8.2s, v10.2s, #7\n\t" "ror w12, w5, #2\n\t" "shl v9.2s, v10.2s, #14\n\t" "eor w13, w5, w6\n\t" "sri v9.2s, v10.2s, #18\n\t" "eor w12, w12, w5, ror 13\n\t" "eor v9.8b, v9.8b, v8.8b\n\t" "eor w14, w6, w7\n\t" "ushr v10.2s, v10.2s, #3\n\t" "and w13, w13, w14\n\t" "eor v9.8b, v9.8b, v10.8b\n\t" "eor w12, w12, w5, ror 22\n\t" "add v7.2s, v7.2s, v9.2s\n\t" "eor w13, w13, w6\n\t" "add w4, w4, w12\n\t" "add w4, w4, w13\n\t" "add %[k], %[k], #0x40\n\t" "subs x24, x24, #1\n\t" "b.ne L_sha256_len_neon_start_%=\n\t" /* Round 0 */ "mov w14, v0.s[0]\n\t" "ror w12, w8, #6\n\t" "eor w13, w9, w10\n\t" "eor w12, w12, w8, ror 11\n\t" "and w13, w13, w8\n\t" "eor w12, w12, w8, ror 25\n\t" "eor w13, w13, w10\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" "ldr w12, [%[k]]\n\t" "add w11, w11, w14\n\t" "add w11, w11, w12\n\t" "add w7, w7, w11\n\t" "ror w12, w4, #2\n\t" "eor w13, w4, w5\n\t" "eor w12, w12, w4, ror 13\n\t" "eor w14, w5, w6\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w4, ror 22\n\t" "eor w13, w13, w5\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" /* Round 1 */ "mov w14, v0.s[1]\n\t" "ror w12, w7, #6\n\t" "eor w13, w8, w9\n\t" "eor w12, w12, w7, ror 11\n\t" "and w13, w13, w7\n\t" "eor w12, w12, w7, ror 25\n\t" "eor w13, w13, w9\n\t" "add w10, w10, w12\n\t" "add w10, w10, w13\n\t" "ldr w12, [%[k], #4]\n\t" "add w10, w10, w14\n\t" "add w10, w10, w12\n\t" "add w6, w6, w10\n\t" "ror w12, w11, #2\n\t" "eor w13, w11, w4\n\t" "eor w12, w12, w11, ror 13\n\t" "eor w14, w4, w5\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w11, ror 22\n\t" "eor w13, w13, w4\n\t" "add w10, w10, w12\n\t" "add w10, w10, w13\n\t" /* Round 2 */ "mov w14, v1.s[0]\n\t" "ror w12, w6, #6\n\t" "eor w13, w7, w8\n\t" "eor w12, w12, w6, ror 11\n\t" "and w13, w13, w6\n\t" "eor w12, w12, w6, ror 25\n\t" "eor w13, w13, w8\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" "ldr w12, [%[k], #8]\n\t" "add w9, w9, w14\n\t" "add w9, w9, w12\n\t" "add w5, w5, w9\n\t" "ror w12, w10, #2\n\t" "eor w13, w10, w11\n\t" "eor w12, w12, w10, ror 13\n\t" "eor w14, w11, w4\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w10, ror 22\n\t" "eor w13, w13, w11\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" /* Round 3 */ "mov w14, v1.s[1]\n\t" "ror w12, w5, #6\n\t" "eor w13, w6, w7\n\t" "eor w12, w12, w5, ror 11\n\t" "and w13, w13, w5\n\t" "eor w12, w12, w5, ror 25\n\t" "eor w13, w13, w7\n\t" "add w8, w8, w12\n\t" "add w8, w8, w13\n\t" "ldr w12, [%[k], #12]\n\t" "add w8, w8, w14\n\t" "add w8, w8, w12\n\t" "add w4, w4, w8\n\t" "ror w12, w9, #2\n\t" "eor w13, w9, w10\n\t" "eor w12, w12, w9, ror 13\n\t" "eor w14, w10, w11\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w9, ror 22\n\t" "eor w13, w13, w10\n\t" "add w8, w8, w12\n\t" "add w8, w8, w13\n\t" /* Round 4 */ "mov w14, v2.s[0]\n\t" "ror w12, w4, #6\n\t" "eor w13, w5, w6\n\t" "eor w12, w12, w4, ror 11\n\t" "and w13, w13, w4\n\t" "eor w12, w12, w4, ror 25\n\t" "eor w13, w13, w6\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" "ldr w12, [%[k], #16]\n\t" "add w7, w7, w14\n\t" "add w7, w7, w12\n\t" "add w11, w11, w7\n\t" "ror w12, w8, #2\n\t" "eor w13, w8, w9\n\t" "eor w12, w12, w8, ror 13\n\t" "eor w14, w9, w10\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w8, ror 22\n\t" "eor w13, w13, w9\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" /* Round 5 */ "mov w14, v2.s[1]\n\t" "ror w12, w11, #6\n\t" "eor w13, w4, w5\n\t" "eor w12, w12, w11, ror 11\n\t" "and w13, w13, w11\n\t" "eor w12, w12, w11, ror 25\n\t" "eor w13, w13, w5\n\t" "add w6, w6, w12\n\t" "add w6, w6, w13\n\t" "ldr w12, [%[k], #20]\n\t" "add w6, w6, w14\n\t" "add w6, w6, w12\n\t" "add w10, w10, w6\n\t" "ror w12, w7, #2\n\t" "eor w13, w7, w8\n\t" "eor w12, w12, w7, ror 13\n\t" "eor w14, w8, w9\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w7, ror 22\n\t" "eor w13, w13, w8\n\t" "add w6, w6, w12\n\t" "add w6, w6, w13\n\t" /* Round 6 */ "mov w14, v3.s[0]\n\t" "ror w12, w10, #6\n\t" "eor w13, w11, w4\n\t" "eor w12, w12, w10, ror 11\n\t" "and w13, w13, w10\n\t" "eor w12, w12, w10, ror 25\n\t" "eor w13, w13, w4\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" "ldr w12, [%[k], #24]\n\t" "add w5, w5, w14\n\t" "add w5, w5, w12\n\t" "add w9, w9, w5\n\t" "ror w12, w6, #2\n\t" "eor w13, w6, w7\n\t" "eor w12, w12, w6, ror 13\n\t" "eor w14, w7, w8\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w6, ror 22\n\t" "eor w13, w13, w7\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" /* Round 7 */ "mov w14, v3.s[1]\n\t" "ror w12, w9, #6\n\t" "eor w13, w10, w11\n\t" "eor w12, w12, w9, ror 11\n\t" "and w13, w13, w9\n\t" "eor w12, w12, w9, ror 25\n\t" "eor w13, w13, w11\n\t" "add w4, w4, w12\n\t" "add w4, w4, w13\n\t" "ldr w12, [%[k], #28]\n\t" "add w4, w4, w14\n\t" "add w4, w4, w12\n\t" "add w8, w8, w4\n\t" "ror w12, w5, #2\n\t" "eor w13, w5, w6\n\t" "eor w12, w12, w5, ror 13\n\t" "eor w14, w6, w7\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w5, ror 22\n\t" "eor w13, w13, w6\n\t" "add w4, w4, w12\n\t" "add w4, w4, w13\n\t" /* Round 8 */ "mov w14, v4.s[0]\n\t" "ror w12, w8, #6\n\t" "eor w13, w9, w10\n\t" "eor w12, w12, w8, ror 11\n\t" "and w13, w13, w8\n\t" "eor w12, w12, w8, ror 25\n\t" "eor w13, w13, w10\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" "ldr w12, [%[k], #32]\n\t" "add w11, w11, w14\n\t" "add w11, w11, w12\n\t" "add w7, w7, w11\n\t" "ror w12, w4, #2\n\t" "eor w13, w4, w5\n\t" "eor w12, w12, w4, ror 13\n\t" "eor w14, w5, w6\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w4, ror 22\n\t" "eor w13, w13, w5\n\t" "add w11, w11, w12\n\t" "add w11, w11, w13\n\t" /* Round 9 */ "mov w14, v4.s[1]\n\t" "ror w12, w7, #6\n\t" "eor w13, w8, w9\n\t" "eor w12, w12, w7, ror 11\n\t" "and w13, w13, w7\n\t" "eor w12, w12, w7, ror 25\n\t" "eor w13, w13, w9\n\t" "add w10, w10, w12\n\t" "add w10, w10, w13\n\t" "ldr w12, [%[k], #36]\n\t" "add w10, w10, w14\n\t" "add w10, w10, w12\n\t" "add w6, w6, w10\n\t" "ror w12, w11, #2\n\t" "eor w13, w11, w4\n\t" "eor w12, w12, w11, ror 13\n\t" "eor w14, w4, w5\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w11, ror 22\n\t" "eor w13, w13, w4\n\t" "add w10, w10, w12\n\t" "add w10, w10, w13\n\t" /* Round 10 */ "mov w14, v5.s[0]\n\t" "ror w12, w6, #6\n\t" "eor w13, w7, w8\n\t" "eor w12, w12, w6, ror 11\n\t" "and w13, w13, w6\n\t" "eor w12, w12, w6, ror 25\n\t" "eor w13, w13, w8\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" "ldr w12, [%[k], #40]\n\t" "add w9, w9, w14\n\t" "add w9, w9, w12\n\t" "add w5, w5, w9\n\t" "ror w12, w10, #2\n\t" "eor w13, w10, w11\n\t" "eor w12, w12, w10, ror 13\n\t" "eor w14, w11, w4\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w10, ror 22\n\t" "eor w13, w13, w11\n\t" "add w9, w9, w12\n\t" "add w9, w9, w13\n\t" /* Round 11 */ "mov w14, v5.s[1]\n\t" "ror w12, w5, #6\n\t" "eor w13, w6, w7\n\t" "eor w12, w12, w5, ror 11\n\t" "and w13, w13, w5\n\t" "eor w12, w12, w5, ror 25\n\t" "eor w13, w13, w7\n\t" "add w8, w8, w12\n\t" "add w8, w8, w13\n\t" "ldr w12, [%[k], #44]\n\t" "add w8, w8, w14\n\t" "add w8, w8, w12\n\t" "add w4, w4, w8\n\t" "ror w12, w9, #2\n\t" "eor w13, w9, w10\n\t" "eor w12, w12, w9, ror 13\n\t" "eor w14, w10, w11\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w9, ror 22\n\t" "eor w13, w13, w10\n\t" "add w8, w8, w12\n\t" "add w8, w8, w13\n\t" /* Round 12 */ "mov w14, v6.s[0]\n\t" "ror w12, w4, #6\n\t" "eor w13, w5, w6\n\t" "eor w12, w12, w4, ror 11\n\t" "and w13, w13, w4\n\t" "eor w12, w12, w4, ror 25\n\t" "eor w13, w13, w6\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" "ldr w12, [%[k], #48]\n\t" "add w7, w7, w14\n\t" "add w7, w7, w12\n\t" "add w11, w11, w7\n\t" "ror w12, w8, #2\n\t" "eor w13, w8, w9\n\t" "eor w12, w12, w8, ror 13\n\t" "eor w14, w9, w10\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w8, ror 22\n\t" "eor w13, w13, w9\n\t" "add w7, w7, w12\n\t" "add w7, w7, w13\n\t" /* Round 13 */ "mov w14, v6.s[1]\n\t" "ror w12, w11, #6\n\t" "eor w13, w4, w5\n\t" "eor w12, w12, w11, ror 11\n\t" "and w13, w13, w11\n\t" "eor w12, w12, w11, ror 25\n\t" "eor w13, w13, w5\n\t" "add w6, w6, w12\n\t" "add w6, w6, w13\n\t" "ldr w12, [%[k], #52]\n\t" "add w6, w6, w14\n\t" "add w6, w6, w12\n\t" "add w10, w10, w6\n\t" "ror w12, w7, #2\n\t" "eor w13, w7, w8\n\t" "eor w12, w12, w7, ror 13\n\t" "eor w14, w8, w9\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w7, ror 22\n\t" "eor w13, w13, w8\n\t" "add w6, w6, w12\n\t" "add w6, w6, w13\n\t" /* Round 14 */ "mov w14, v7.s[0]\n\t" "ror w12, w10, #6\n\t" "eor w13, w11, w4\n\t" "eor w12, w12, w10, ror 11\n\t" "and w13, w13, w10\n\t" "eor w12, w12, w10, ror 25\n\t" "eor w13, w13, w4\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" "ldr w12, [%[k], #56]\n\t" "add w5, w5, w14\n\t" "add w5, w5, w12\n\t" "add w9, w9, w5\n\t" "ror w12, w6, #2\n\t" "eor w13, w6, w7\n\t" "eor w12, w12, w6, ror 13\n\t" "eor w14, w7, w8\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w6, ror 22\n\t" "eor w13, w13, w7\n\t" "add w5, w5, w12\n\t" "add w5, w5, w13\n\t" /* Round 15 */ "mov w14, v7.s[1]\n\t" "ror w12, w9, #6\n\t" "eor w13, w10, w11\n\t" "eor w12, w12, w9, ror 11\n\t" "and w13, w13, w9\n\t" "eor w12, w12, w9, ror 25\n\t" "eor w13, w13, w11\n\t" "add w4, w4, w12\n\t" "add w4, w4, w13\n\t" "ldr w12, [%[k], #60]\n\t" "add w4, w4, w14\n\t" "add w4, w4, w12\n\t" "add w8, w8, w4\n\t" "ror w12, w5, #2\n\t" "eor w13, w5, w6\n\t" "eor w12, w12, w5, ror 13\n\t" "eor w14, w6, w7\n\t" "and w13, w13, w14\n\t" "eor w12, w12, w5, ror 22\n\t" "eor w13, w13, w6\n\t" "add w4, w4, w12\n\t" "add w4, w4, w13\n\t" "add w11, w11, w23\n\t" "add w10, w10, w22\n\t" "add w9, w9, w21\n\t" "add w8, w8, w20\n\t" "add w7, w7, w19\n\t" "add w6, w6, w17\n\t" "add w5, w5, w16\n\t" "add w4, w4, w15\n\t" "subs %w[len], %w[len], #0x40\n\t" "sub %[k], %[k], #0xc0\n\t" "b.ne L_sha256_len_neon_begin_%=\n\t" "str w4, [%x[sha256]]\n\t" "str w5, [%x[sha256], #4]\n\t" "str w6, [%x[sha256], #8]\n\t" "str w7, [%x[sha256], #12]\n\t" "str w8, [%x[sha256], #16]\n\t" "str w9, [%x[sha256], #20]\n\t" "str w10, [%x[sha256], #24]\n\t" "str w11, [%x[sha256], #28]\n\t" : [sha256] "+r" (sha256), [len] "+r" (len) : [data] "r" (data), [k] "r" (k) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); } #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO XALIGNED(8) static const word32 L_SHA256_trans_crypto_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, }; void Transform_Sha256_Len_crypto(wc_Sha256* sha256, const byte* data, word32 len); void Transform_Sha256_Len_crypto(wc_Sha256* sha256, const byte* data, word32 len) { const word32* k = L_SHA256_trans_crypto_len_k; __asm__ __volatile__ ( /* Load K into vector registers */ "ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%[k]], #0x40\n\t" "ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%[k]], #0x40\n\t" "ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[k]], #0x40\n\t" "ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[k]], #0x40\n\t" /* Load digest into working vars */ "ld1 {v0.4s, v1.4s}, [%x[sha256]]\n\t" /* Start of loop processing a block */ "\n" "L_sha256_len_crypto_begin_%=:\n\t" /* Load W */ "ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%x[data]], #0x40\n\t" "rev32 v4.16b, v4.16b\n\t" "rev32 v5.16b, v5.16b\n\t" "rev32 v6.16b, v6.16b\n\t" "rev32 v7.16b, v7.16b\n\t" /* Copy digest to add in at end */ "mov v2.16b, v0.16b\n\t" "mov v3.16b, v1.16b\n\t" /* Start 16 rounds */ /* Round 1 */ "add v24.4s, v4.4s, v8.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 2 */ "sha256su0 v4.4s, v5.4s\n\t" "add v24.4s, v5.4s, v9.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v4.4s, v6.4s, v7.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 3 */ "sha256su0 v5.4s, v6.4s\n\t" "add v24.4s, v6.4s, v10.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v5.4s, v7.4s, v4.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 4 */ "sha256su0 v6.4s, v7.4s\n\t" "add v24.4s, v7.4s, v11.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v6.4s, v4.4s, v5.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 5 */ "sha256su0 v7.4s, v4.4s\n\t" "add v24.4s, v4.4s, v12.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v7.4s, v5.4s, v6.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 6 */ "sha256su0 v4.4s, v5.4s\n\t" "add v24.4s, v5.4s, v13.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v4.4s, v6.4s, v7.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 7 */ "sha256su0 v5.4s, v6.4s\n\t" "add v24.4s, v6.4s, v14.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v5.4s, v7.4s, v4.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 8 */ "sha256su0 v6.4s, v7.4s\n\t" "add v24.4s, v7.4s, v15.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v6.4s, v4.4s, v5.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 9 */ "sha256su0 v7.4s, v4.4s\n\t" "add v24.4s, v4.4s, v16.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v7.4s, v5.4s, v6.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 10 */ "sha256su0 v4.4s, v5.4s\n\t" "add v24.4s, v5.4s, v17.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v4.4s, v6.4s, v7.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 11 */ "sha256su0 v5.4s, v6.4s\n\t" "add v24.4s, v6.4s, v18.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v5.4s, v7.4s, v4.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 12 */ "sha256su0 v6.4s, v7.4s\n\t" "add v24.4s, v7.4s, v19.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v6.4s, v4.4s, v5.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 13 */ "sha256su0 v7.4s, v4.4s\n\t" "add v24.4s, v4.4s, v20.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256su1 v7.4s, v5.4s, v6.4s\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 14 */ "add v24.4s, v5.4s, v21.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 15 */ "add v24.4s, v6.4s, v22.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Round 16 */ "add v24.4s, v7.4s, v23.4s\n\t" "mov v25.16b, v0.16b\n\t" "sha256h q0, q1, v24.4s\n\t" "sha256h2 q1, q25, v24.4s\n\t" /* Done 16 rounds */ "add v0.4s, v0.4s, v2.4s\n\t" "add v1.4s, v1.4s, v3.4s\n\t" "subs %w[len], %w[len], #0x40\n\t" "b.ne L_sha256_len_crypto_begin_%=\n\t" /* Store digest back */ "st1 {v0.4s, v1.4s}, [%x[sha256]]\n\t" : [sha256] "+r" (sha256), [len] "+r" (len) : [data] "r" (data), [k] "r" (k) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25" ); } #endif /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ #endif /* !NO_SHA256 || WOLFSSL_SHA224 */ #endif /* __aarch64__ */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */